Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Core] Allow changing language stemmer when possible #3361

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<?php
/**
* DISCLAIMER
*
* Do not edit or add to this file if you wish to upgrade this module to newer versions in the future.
*
* @category Smile
* @package Smile\ElasticsuiteCore
* @author Richard BAYET <richard.bayet@smile.fr>
* @copyright 2024 Smile
* @license Open Software License ("OSL") v. 3.0
*/

namespace Smile\ElasticsuiteCore\Block\Adminhtml\System\Config;

use Magento\Config\Block\System\Config\Form\Field;
use Magento\Framework\Data\Form\Element\AbstractElement;

/**
* System config comment block.
* Allows to display a label and a comment without a field label and the scope input.
* This allows to display a comment at default scope for fields only available at the store view level.
*
* @category Smile
* @package Smile\ElasticsuiteCore
*/
class Comment extends Field
{
/**
* Retrieve HTML markup for given form element.
*
* @param AbstractElement $element Form element.
*
* @return string
*/
public function render(\Magento\Framework\Data\Form\Element\AbstractElement $element)
{
$html = '<div class="elasticsuite config-comment-title">' . $element->getLabel() . '</div>';
$html .= '<div class="elasticsuite config-comment-content">' . $element->getComment() . '</div>';

return $this->decorateRowHtml($element, $html);
}

/**
* Decorate field row html.
*
* @param AbstractElement $element Form element.
* @param string $html Field row html.
*
* @return string
*/
private function decorateRowHtml(\Magento\Framework\Data\Form\Element\AbstractElement $element, $html)
{
return sprintf(
'<tr id="row_%s"><td colspan="3"><div class="elasticsuite config-comment">%s</div></td></tr>',
$element->getHtmlId(),
$html
);
}
}
39 changes: 39 additions & 0 deletions src/module-elasticsuite-core/Helper/IndexSettings.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ class IndexSettings extends AbstractConfiguration
*/
const REFERENCE_ANALYZER_CONFIG_XML_PREFIX = 'reference_analyzer';

/**
* @var string
*/
const STEMMER_USE_DEFAULT_CONFIG_XML_PATH = 'stemmer/use_default';

/**
* @var string
*/
const STEMMER_CUSTOM_STEMMER_CONFIG_XML_PATH = 'stemmer/custom';

/**
* @var string
*/
Expand Down Expand Up @@ -406,6 +416,35 @@ public function getReferenceAnalyzerConfigFlag($configFlag, $store)
return $this->scopeConfig->isSetFlag($path, ScopeInterface::SCOPE_STORE, $store);
}


/**
* Returns true if the given store used a non-default language stemmer.
*
* @param integer|string|StoreInterface $store Store.
*
* @return bool
*/
public function hasCustomLanguageStemmer($store)
{
$path = self::ANALYSIS_CONFIG_XML_PREFIX . '/' . self::STEMMER_USE_DEFAULT_CONFIG_XML_PATH;

return (false === $this->scopeConfig->isSetFlag($path, ScopeInterface::SCOPE_STORE, $store));
}

/**
* Returns the custom stemmer to use for the given store.
*
* @param integer|string|StoreInterface $store Store.
*
* @return mixed
*/
public function getCustomLanguageStemmer($store)
{
$path = self::ANALYSIS_CONFIG_XML_PREFIX . '/' . self::STEMMER_CUSTOM_STEMMER_CONFIG_XML_PATH;

return $this->scopeConfig->getValue($path, ScopeInterface::SCOPE_STORE, $store);
}

/**
* Retrieve a configuration value by its key.
*
Expand Down
130 changes: 130 additions & 0 deletions src/module-elasticsuite-core/Index/Analysis/Config/Converter.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ class Converter implements \Magento\Framework\Config\ConverterInterface
const ANALYZER_TYPE_NODE = 'analyzer';
const NORMALIZER_TYPE_ROOT_NODE = 'normalizers';
const NORMALIZER_TYPE_NODE = 'normalizer';
const STEMMER_TYPE_ROOT_NODE = 'stemmers';
const STEMMER_GROUP_TYPE_NODE = 'group';
const STEMMER_TYPE_NODE = 'stemmer';
const LANGUAGE_DEFAULT = 'default';

/**
Expand Down Expand Up @@ -71,6 +74,11 @@ public function convert($source)
$configuration[$language] = $this->getLanguageConfiguration($xpath, $language, $defaultConfig);
}

$configuration['default'][self::STEMMER_TYPE_ROOT_NODE] = $this->getAllStemmersOptions(
$xpath,
$this->getAllDefaultLanguageStemmers($xpath)
);

return $configuration;
}

Expand Down Expand Up @@ -339,4 +347,126 @@ private function getFiltersByRef(\DOMXPath $xpath, \DomNode $rootNode, $searchPa

return $filters;
}

/**
* Return all default language stemmers as defined currently in config.
* Relieson the fact that the filter used will be of type stemmer and named stemmer.
*
* @param \DOMXPath $xpath XPath access to the document parsed.
*
* @return array
*/
private function getAllDefaultLanguageStemmers(\DOMXPath $xpath)
{
$defaultStemmers = [];

$filterPath = "@type='stemmer' and @name='stemmer'";
$stemmerFiltersPath = sprintf(
"/%s/%s/%s[%s]",
self::ROOT_NODE_NAME,
self::FILTER_TYPE_ROOT_NODE,
self::FILTER_TYPE_NODE,
$filterPath
);

$stemmerFilterNodes = $xpath->query($stemmerFiltersPath);
foreach ($stemmerFilterNodes as $stemmerFilterNode) {
$language = $stemmerFilterNode->getAttribute('language');
$stemmer = false;
foreach ($stemmerFilterNode->childNodes as $childNode) {
if ($childNode instanceof \DOMElement) {
if ($childNode->tagName === 'language') {
$stemmer = $childNode->nodeValue;
break;
}
}
}
if (!empty($stemmer)) {
$defaultStemmers[$language] = $stemmer;
}
}

return $defaultStemmers;
}

/**
* Parse all stemmers options available for language that support multiple stemmers.
*
* @param \DOMXPath $xpath XPath access to the document parsed.
* @param array $defaultStemmers Default stemmers for available languages.
*
* @return array
*/
private function getAllStemmersOptions(\DOMXPath $xpath, $defaultStemmers = [])
{
$stemmerOptions = [];

$searchPath = sprintf(
"/%s/%s/%s",
self::ROOT_NODE_NAME,
self::STEMMER_TYPE_ROOT_NODE,
self::STEMMER_GROUP_TYPE_NODE
);
$stemmerGroupNodes = $xpath->query($searchPath);
foreach ($stemmerGroupNodes as $stemmerGroupNode) {
$languageCode = $stemmerGroupNode->getAttribute('language');
$languageTitle = $stemmerGroupNode->getAttribute('title');
$stemmerOptions[$languageCode] = [
'identifier' => $languageCode,
'title' => $languageTitle,
'stemmers' => [],
];

$stemmerOptions[$languageCode]['stemmers'] = $this->getLanguageStemmers(
$xpath,
$stemmerGroupNode,
$defaultStemmers[$languageCode] ?: null
);
}

return $stemmerOptions;
}

/**
* Parse available stemmers for a given language.
*
* @param \DOMXPath $xpath XPath access to the document parsed.
* @param \DomNode $rootNode Stemmers group node for a given language.
* @param string|null $defaultStemmer Default stemme for the given language, if defined.
*
* @return array
*/
private function getLanguageStemmers(\DOMXPath $xpath, \DomNode $rootNode, $defaultStemmer = null)
{
$stemmers = [];

$searchPath = sprintf("./%s", self::STEMMER_TYPE_NODE);
$stemmerNodes = $xpath->query($searchPath, $rootNode);
foreach ($stemmerNodes as $stemmerNode) {
$identifier = $stemmerNode->getAttribute('identifier');
$stemmer = [
'identifier' => $identifier,
'recommended' => $stemmerNode->getAttribute('recommended') ?: false,
'default' => ($identifier === $defaultStemmer),
];
foreach ($stemmerNode->childNodes as $childNode) {
if ($childNode instanceof \DOMElement) {
if ($childNode->tagName === 'label') {
$stemmer['label'] = $childNode->nodeValue;
continue;
}

try {
$stemmer[$childNode->tagName] = $this->jsonDecoder->decode($childNode->nodeValue);
} catch (\Exception $exception) {
$stemmer[$childNode->tagName] = $childNode->nodeValue;
}
}
}

$stemmers[$identifier] = $stemmer;
}

return $stemmers;
}
}
2 changes: 2 additions & 0 deletions src/module-elasticsuite-core/Index/Analysis/Config/Reader.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class Reader extends Filesystem
'/analysis/filters/filter' => ['name', 'language'],
'/analysis/tokenizers/tokenizer' => ['name', 'language'],
'/analysis/analyzers/analyzer' => ['name', 'language'],
'/analysis/stemmers/group' => ['language'],
'/analysis/stemmers/group/stemmer' => ['identifier'],
];

/**
Expand Down
27 changes: 27 additions & 0 deletions src/module-elasticsuite-core/Index/IndexSettings.php
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ public function getAnalysisSettings($store)
$settings['analyzer']['reference']['filter'] = array_values($referenceFilters);
}

$settings = $this->applyCustomLanguageStemmer($settings, $store);

return $settings;
}

Expand Down Expand Up @@ -223,4 +225,29 @@ public function getDynamicIndexSettings($store)

return $settings;
}

/**
* Alter analysis settings to apply possible custom language stemmer.
*
* @param array $analysisSettings Analysis settings.
* @param integer|string|\Magento\Store\Api\Data\StoreInterface $store Store.
*
* @return array
*/
protected function applyCustomLanguageStemmer($analysisSettings, $store)
{
if ($this->helper->hasCustomLanguageStemmer($store)
&& is_array($analysisSettings)
&& array_key_exists('filter', $analysisSettings)
&& array_key_exists('stemmer', $analysisSettings['filter'])
&& array_key_exists('language', $analysisSettings['filter']['stemmer'])
) {
$customStemmer = $this->helper->getCustomLanguageStemmer($store) ?? false;
if (!empty($customStemmer)) {
$analysisSettings['filter']['stemmer']['language'] = $customStemmer;
}
}

return $analysisSettings;
}
}
Loading
Loading