From 2509f1855496280c562f646386d5c0f51073e8f2 Mon Sep 17 00:00:00 2001 From: Bartek Date: Thu, 29 Oct 2020 12:54:20 +0100 Subject: [PATCH] EZP-31287: Added FullText search Fields for Email and ISBN (#3023) --- .../FieldType/EmailAddressIntegrationTest.php | 9 +++- .../Tests/FieldType/ISBNIntegrationTest.php | 7 +++ .../FieldType/EmailAddress/SearchField.php | 8 ++++ .../Core/FieldType/ISBN/SearchField.php | 5 ++ .../Gateway/CriterionHandler/FullText.php | 3 ++ .../Search/Legacy/Content/FullTextValue.php | 14 ++++++ .../Legacy/Content/Mapper/FullTextMapper.php | 48 +++++++++---------- .../WordIndexer/Gateway/DoctrineDatabase.php | 9 +++- .../SPI/Search/FieldType/FullTextField.php | 22 +++++++++ 9 files changed, 96 insertions(+), 29 deletions(-) diff --git a/eZ/Publish/API/Repository/Tests/FieldType/EmailAddressIntegrationTest.php b/eZ/Publish/API/Repository/Tests/FieldType/EmailAddressIntegrationTest.php index 009f5fa198..146fa1c0fd 100644 --- a/eZ/Publish/API/Repository/Tests/FieldType/EmailAddressIntegrationTest.php +++ b/eZ/Publish/API/Repository/Tests/FieldType/EmailAddressIntegrationTest.php @@ -353,7 +353,7 @@ protected function getSearchTargetValueOne() protected function getValidSearchValueTwo() { - return 'wyoming.knott@ez.no'; + return 'wyoming.knott@o2.ru'; } protected function getSearchTargetValueTwo() @@ -361,4 +361,11 @@ protected function getSearchTargetValueTwo() // ensure case-insensitivity return strtoupper($this->getValidSearchValueTwo()); } + + protected function getFullTextIndexedFieldData() + { + return [ + ['holmes4@ez.no', 'wyoming.knott@o2.ru'], + ]; + } } diff --git a/eZ/Publish/API/Repository/Tests/FieldType/ISBNIntegrationTest.php b/eZ/Publish/API/Repository/Tests/FieldType/ISBNIntegrationTest.php index 6653ad770c..c03896122d 100644 --- a/eZ/Publish/API/Repository/Tests/FieldType/ISBNIntegrationTest.php +++ b/eZ/Publish/API/Repository/Tests/FieldType/ISBNIntegrationTest.php @@ -358,4 +358,11 @@ protected function getValidSearchValueTwo() { return '9780380448340'; } + + protected function getFullTextIndexedFieldData() + { + return [ + ['9780099067504', '9780380448340'], + ]; + } } diff --git a/eZ/Publish/Core/FieldType/EmailAddress/SearchField.php b/eZ/Publish/Core/FieldType/EmailAddress/SearchField.php index fcfeebb5e0..91944b23fa 100644 --- a/eZ/Publish/Core/FieldType/EmailAddress/SearchField.php +++ b/eZ/Publish/Core/FieldType/EmailAddress/SearchField.php @@ -32,6 +32,14 @@ public function getIndexData(Field $field, FieldDefinition $fieldDefinition) $field->value->data, new Search\FieldType\StringField() ), + new Search\Field( + 'fulltext', + $field->value->data, + new Search\FieldType\FullTextField([ + 'space_normalize', + 'latin1_lowercase', + ], false) + ), ]; } diff --git a/eZ/Publish/Core/FieldType/ISBN/SearchField.php b/eZ/Publish/Core/FieldType/ISBN/SearchField.php index ff0c435f4c..e7f82b5811 100644 --- a/eZ/Publish/Core/FieldType/ISBN/SearchField.php +++ b/eZ/Publish/Core/FieldType/ISBN/SearchField.php @@ -32,6 +32,11 @@ public function getIndexData(Field $field, FieldDefinition $fieldDefinition) $field->value->data, new Search\FieldType\StringField() ), + new Search\Field( + 'fulltext', + $field->value->data, + new Search\FieldType\FullTextField() + ), ]; } diff --git a/eZ/Publish/Core/Search/Legacy/Content/Common/Gateway/CriterionHandler/FullText.php b/eZ/Publish/Core/Search/Legacy/Content/Common/Gateway/CriterionHandler/FullText.php index b3d37a432b..08b58d9056 100644 --- a/eZ/Publish/Core/Search/Legacy/Content/Common/Gateway/CriterionHandler/FullText.php +++ b/eZ/Publish/Core/Search/Legacy/Content/Common/Gateway/CriterionHandler/FullText.php @@ -196,6 +196,9 @@ protected function getWordIdSubquery(SelectQuery $query, $string) $wordExpressions[] = $this->getWordExpression($subQuery, $token); } + // Search for provided string itself as well + $wordExpressions[] = $this->getWordExpression($subQuery, $string); + $whereCondition = $subQuery->expr->lOr($wordExpressions); // If stop word threshold is below 100%, make it part of $whereCondition diff --git a/eZ/Publish/Core/Search/Legacy/Content/FullTextValue.php b/eZ/Publish/Core/Search/Legacy/Content/FullTextValue.php index c6e39805d9..4c3215871a 100644 --- a/eZ/Publish/Core/Search/Legacy/Content/FullTextValue.php +++ b/eZ/Publish/Core/Search/Legacy/Content/FullTextValue.php @@ -50,4 +50,18 @@ class FullTextValue extends ValueObject * @var bool */ public $isMainAndAlwaysAvailable; + + /** + * Array of rules to be used when transforming the value. + * + * @var array + */ + public $transformationRules; + + /** + * Flag whether the value should be split by non-words. + * + * @var bool + */ + public $splitFlag; } diff --git a/eZ/Publish/Core/Search/Legacy/Content/Mapper/FullTextMapper.php b/eZ/Publish/Core/Search/Legacy/Content/Mapper/FullTextMapper.php index e0ab8444e0..8310a85472 100644 --- a/eZ/Publish/Core/Search/Legacy/Content/Mapper/FullTextMapper.php +++ b/eZ/Publish/Core/Search/Legacy/Content/Mapper/FullTextMapper.php @@ -10,6 +10,7 @@ use eZ\Publish\Core\Search\Legacy\Content\FullTextData; use eZ\Publish\SPI\Persistence\Content; use eZ\Publish\SPI\Persistence\Content\Type; +use eZ\Publish\SPI\Search\Field; use eZ\Publish\SPI\Search\FieldType; use eZ\Publish\SPI\Persistence\Content\Type\Handler as ContentTypeHandler; use eZ\Publish\Core\Search\Legacy\Content\FullTextValue; @@ -73,8 +74,10 @@ public function mapContent(Content $content) * @param \eZ\Publish\SPI\Persistence\Content $content * * @return \eZ\Publish\Core\Search\Legacy\Content\FullTextValue[] + * + * @throws \eZ\Publish\API\Repository\Exceptions\NotFoundException */ - protected function getFullTextValues(Content $content) + protected function getFullTextValues(Content $content): array { $fullTextValues = []; foreach ($content->fields as $field) { @@ -85,10 +88,13 @@ protected function getFullTextValues(Content $content) continue; } - $value = $this->getFullTextFieldValue($field, $fieldDefinition); - if (empty($value)) { + $fullTextField = $this->extractFullTextField($field, $fieldDefinition); + if (null === $fullTextField || empty($fullTextField->value)) { continue; } + $fullTextValue = !is_array($fullTextField->value) + ? $fullTextField->value + : implode(' ', $fullTextField->value); $contentInfo = $content->versionInfo->contentInfo; $fullTextValues[] = new FullTextValue( @@ -97,10 +103,12 @@ protected function getFullTextValues(Content $content) 'fieldDefinitionId' => $field->fieldDefinitionId, 'fieldDefinitionIdentifier' => $fieldDefinition->identifier, 'languageCode' => $field->languageCode, - 'value' => !is_array($value) ? $value : implode(' ', $value), + 'value' => $fullTextValue, 'isMainAndAlwaysAvailable' => ( $field->languageCode === $contentInfo->mainLanguageCode && $contentInfo->alwaysAvailable ), + 'transformationRules' => $fullTextField->type->transformationRules, + 'splitFlag' => $fullTextField->type->splitFlag, ] ); } @@ -108,30 +116,18 @@ protected function getFullTextValues(Content $content) return $fullTextValues; } - /** - * Get FullTextField value. - * - * @param Content\Field $field - * @param Type\FieldDefinition $fieldDefinition - * - * @return string - */ - private function getFullTextFieldValue(Content\Field $field, Type\FieldDefinition $fieldDefinition) - { + private function extractFullTextField( + Content\Field $field, + Type\FieldDefinition $fieldDefinition + ): ?Field { $fieldType = $this->fieldRegistry->getType($field->type); - $indexFields = $fieldType->getIndexData($field, $fieldDefinition); - - // find value to be returned (stored in FullTextField) - $fullTextFieldValue = ''; - foreach ($indexFields as $field) { - /** @var \eZ\Publish\SPI\Search\Field $field */ - if ($field->type instanceof FieldType\FullTextField) { - $fullTextFieldValue = $field->value; - break; + $fullTextFields = array_filter( + $fieldType->getIndexData($field, $fieldDefinition), + static function ($indexField) { + return $indexField->type instanceof FieldType\FullTextField; } - } + ); - // some full text fields are stored as an array of strings - return !is_array($fullTextFieldValue) ? $fullTextFieldValue : implode(' ', $fullTextFieldValue); + return !empty($fullTextFields) ? array_values($fullTextFields)[0] : null; } } diff --git a/eZ/Publish/Core/Search/Legacy/Content/WordIndexer/Gateway/DoctrineDatabase.php b/eZ/Publish/Core/Search/Legacy/Content/WordIndexer/Gateway/DoctrineDatabase.php index 93552308b0..71f894077f 100644 --- a/eZ/Publish/Core/Search/Legacy/Content/WordIndexer/Gateway/DoctrineDatabase.php +++ b/eZ/Publish/Core/Search/Legacy/Content/WordIndexer/Gateway/DoctrineDatabase.php @@ -128,9 +128,14 @@ public function index(FullTextData $fullTextData) } else { $integerValue = 0; } - $text = $this->transformationProcessor->transform($fullTextValue->value, $this->fullTextSearchConfiguration['commands']); + $text = $this->transformationProcessor->transform( + $fullTextValue->value, + !empty($fullTextValue->transformationRules) + ? $fullTextValue->transformationRules + : $this->fullTextSearchConfiguration['commands'] + ); // split by non-words - $wordArray = preg_split('/\W/u', $text, -1, PREG_SPLIT_NO_EMPTY); + $wordArray = $fullTextValue->splitFlag ? preg_split('/\W/u', $text, -1, PREG_SPLIT_NO_EMPTY) : [$text]; foreach ($wordArray as $word) { if (trim($word) === '') { continue; diff --git a/eZ/Publish/SPI/Search/FieldType/FullTextField.php b/eZ/Publish/SPI/Search/FieldType/FullTextField.php index 16663bac6a..863b92ffda 100644 --- a/eZ/Publish/SPI/Search/FieldType/FullTextField.php +++ b/eZ/Publish/SPI/Search/FieldType/FullTextField.php @@ -19,4 +19,26 @@ class FullTextField extends FieldType * @var string */ protected $type = 'ez_fulltext'; + + /** + * Transformation rules to be used when transforming the given string. + * + * @var array + */ + public $transformationRules; + + /** + * Flag whether the string should be split by non-words. + * + * @var bool + */ + public $splitFlag; + + public function __construct(array $transformationRules = [], bool $splitFlag = true) + { + $this->transformationRules = $transformationRules; + $this->splitFlag = $splitFlag; + + parent::__construct(); + } }