Skip to content

Commit

Permalink
[Core] Allow changing language stemmer when possible
Browse files Browse the repository at this point in the history
  • Loading branch information
rbayet committed Sep 2, 2024
1 parent dc678bd commit 9031672
Show file tree
Hide file tree
Showing 13 changed files with 612 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<?php
/**
* DISCLAIMER
*
* Do not edit or add to this file if you wish to upgrade this module to newer versions in the future.
*
* @category Smile
* @package Smile\ElasticsuiteCore
* @author Richard BAYET <richard.bayet@smile.fr>
* @copyright 2024 Smile
* @license Open Software License ("OSL") v. 3.0
*/

namespace Smile\ElasticsuiteCore\Block\Adminhtml\System\Config;

use Magento\Config\Block\System\Config\Form\Field;
use Magento\Framework\Data\Form\Element\AbstractElement;

/**
* System config comment block.
* Allows to display a label and a comment without a field label and the scope input.
* This allows to display a comment at default scope for fields only available at the store view level.
*
* @category Smile
* @package Smile\ElasticsuiteCore
*/
class Comment extends Field
{
/**
* Retrieve HTML markup for given form element.
*
* @param AbstractElement $element Form element.
*
* @return string
*/
public function render(\Magento\Framework\Data\Form\Element\AbstractElement $element)
{
$html = '<div class="elasticsuite config-comment-title">' . $element->getLabel() . '</div>';
$html .= '<div class="elasticsuite config-comment-content">' . $element->getComment() . '</div>';

return $this->decorateRowHtml($element, $html);
}

/**
* Decorate field row html.
*
* @param AbstractElement $element Form element.
* @param string $html Field row html.
*
* @return string
*/
private function decorateRowHtml(\Magento\Framework\Data\Form\Element\AbstractElement $element, $html)
{
return sprintf(
'<tr id="row_%s"><td colspan="3"><div class="elasticsuite config-comment">%s</div></td></tr>',
$element->getHtmlId(),
$html
);
}
}
39 changes: 39 additions & 0 deletions src/module-elasticsuite-core/Helper/IndexSettings.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ class IndexSettings extends AbstractConfiguration
*/
const REFERENCE_ANALYZER_CONFIG_XML_PREFIX = 'reference_analyzer';

/**
* @var string
*/
const STEMMER_USE_DEFAULT_CONFIG_XML_PATH = 'stemmer/use_default';

/**
* @var string
*/
const STEMMER_CUSTOM_STEMMER_CONFIG_XML_PATH = 'stemmer/custom';

/**
* @var string
*/
Expand Down Expand Up @@ -406,6 +416,35 @@ public function getReferenceAnalyzerConfigFlag($configFlag, $store)
return $this->scopeConfig->isSetFlag($path, ScopeInterface::SCOPE_STORE, $store);
}


/**
* Returns true if the given store used a non-default language stemmer.
*
* @param integer|string|StoreInterface $store Store.
*
* @return bool
*/
public function hasCustomLanguageStemmer($store)
{
$path = self::ANALYSIS_CONFIG_XML_PREFIX . '/' . self::STEMMER_USE_DEFAULT_CONFIG_XML_PATH;

return (false === $this->scopeConfig->isSetFlag($path, ScopeInterface::SCOPE_STORE, $store));
}

/**
* Returns the custom stemmer to use for the given store.
*
* @param integer|string|StoreInterface $store Store.
*
* @return mixed
*/
public function getCustomLanguageStemmer($store)
{
$path = self::ANALYSIS_CONFIG_XML_PREFIX . '/' . self::STEMMER_CUSTOM_STEMMER_CONFIG_XML_PATH;

return $this->scopeConfig->getValue($path, ScopeInterface::SCOPE_STORE, $store);
}

/**
* Retrieve a configuration value by its key.
*
Expand Down
130 changes: 130 additions & 0 deletions src/module-elasticsuite-core/Index/Analysis/Config/Converter.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ class Converter implements \Magento\Framework\Config\ConverterInterface
const ANALYZER_TYPE_NODE = 'analyzer';
const NORMALIZER_TYPE_ROOT_NODE = 'normalizers';
const NORMALIZER_TYPE_NODE = 'normalizer';
const STEMMER_TYPE_ROOT_NODE = 'stemmers';
const STEMMER_GROUP_TYPE_NODE = 'group';
const STEMMER_TYPE_NODE = 'stemmer';
const LANGUAGE_DEFAULT = 'default';

/**
Expand Down Expand Up @@ -71,6 +74,11 @@ public function convert($source)
$configuration[$language] = $this->getLanguageConfiguration($xpath, $language, $defaultConfig);
}

$configuration['default'][self::STEMMER_TYPE_ROOT_NODE] = $this->getAllStemmersOptions(
$xpath,
$this->getAllDefaultLanguageStemmers($xpath)
);

return $configuration;
}

Expand Down Expand Up @@ -339,4 +347,126 @@ private function getFiltersByRef(\DOMXPath $xpath, \DomNode $rootNode, $searchPa

return $filters;
}

/**
* Return all default language stemmers as defined currently in config.
* Relieson the fact that the filter used will be of type stemmer and named stemmer.
*
* @param \DOMXPath $xpath XPath access to the document parsed.
*
* @return array
*/
private function getAllDefaultLanguageStemmers(\DOMXPath $xpath)
{
$defaultStemmers = [];

$filterPath = "@type='stemmer' and @name='stemmer'";
$stemmerFiltersPath = sprintf(
"/%s/%s/%s[%s]",
self::ROOT_NODE_NAME,
self::FILTER_TYPE_ROOT_NODE,
self::FILTER_TYPE_NODE,
$filterPath
);

$stemmerFilterNodes = $xpath->query($stemmerFiltersPath);
foreach ($stemmerFilterNodes as $stemmerFilterNode) {
$language = $stemmerFilterNode->getAttribute('language');
$stemmer = false;
foreach ($stemmerFilterNode->childNodes as $childNode) {
if ($childNode instanceof \DOMElement) {
if ($childNode->tagName === 'language') {
$stemmer = $childNode->nodeValue;
break;
}
}
}
if (!empty($stemmer)) {
$defaultStemmers[$language] = $stemmer;
}
}

return $defaultStemmers;
}

/**
* Parse all stemmers options available for language that support multiple stemmers.
*
* @param \DOMXPath $xpath XPath access to the document parsed.
* @param array $defaultStemmers Default stemmers for available languages.
*
* @return array
*/
private function getAllStemmersOptions(\DOMXPath $xpath, $defaultStemmers = [])
{
$stemmerOptions = [];

$searchPath = sprintf(
"/%s/%s/%s",
self::ROOT_NODE_NAME,
self::STEMMER_TYPE_ROOT_NODE,
self::STEMMER_GROUP_TYPE_NODE
);
$stemmerGroupNodes = $xpath->query($searchPath);
foreach ($stemmerGroupNodes as $stemmerGroupNode) {
$languageCode = $stemmerGroupNode->getAttribute('language');
$languageTitle = $stemmerGroupNode->getAttribute('title');
$stemmerOptions[$languageCode] = [
'identifier' => $languageCode,
'title' => $languageTitle,
'stemmers' => [],
];

$stemmerOptions[$languageCode]['stemmers'] = $this->getLanguageStemmers(
$xpath,
$stemmerGroupNode,
$defaultStemmers[$languageCode] ?: null
);
}

return $stemmerOptions;
}

/**
* Parse available stemmers for a given language.
*
* @param \DOMXPath $xpath XPath access to the document parsed.
* @param \DomNode $rootNode Stemmers group node for a given language.
* @param string|null $defaultStemmer Default stemme for the given language, if defined.
*
* @return array
*/
private function getLanguageStemmers(\DOMXPath $xpath, \DomNode $rootNode, $defaultStemmer = null)
{
$stemmers = [];

$searchPath = sprintf("./%s", self::STEMMER_TYPE_NODE);
$stemmerNodes = $xpath->query($searchPath, $rootNode);
foreach ($stemmerNodes as $stemmerNode) {
$identifier = $stemmerNode->getAttribute('identifier');
$stemmer = [
'identifier' => $identifier,
'recommended' => $stemmerNode->getAttribute('recommended') ?: false,
'default' => ($identifier === $defaultStemmer),
];
foreach ($stemmerNode->childNodes as $childNode) {
if ($childNode instanceof \DOMElement) {
if ($childNode->tagName === 'label') {
$stemmer['label'] = $childNode->nodeValue;
continue;
}

try {
$stemmer[$childNode->tagName] = $this->jsonDecoder->decode($childNode->nodeValue);
} catch (\Exception $exception) {
$stemmer[$childNode->tagName] = $childNode->nodeValue;
}
}
}

$stemmers[$identifier] = $stemmer;
}

return $stemmers;
}
}
2 changes: 2 additions & 0 deletions src/module-elasticsuite-core/Index/Analysis/Config/Reader.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class Reader extends Filesystem
'/analysis/filters/filter' => ['name', 'language'],
'/analysis/tokenizers/tokenizer' => ['name', 'language'],
'/analysis/analyzers/analyzer' => ['name', 'language'],
'/analysis/stemmers/group' => ['language'],
'/analysis/stemmers/group/stemmer' => ['identifier'],
];

/**
Expand Down
27 changes: 27 additions & 0 deletions src/module-elasticsuite-core/Index/IndexSettings.php
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ public function getAnalysisSettings($store)
$settings['analyzer']['reference']['filter'] = array_values($referenceFilters);
}

$settings = $this->applyCustomLanguageStemmer($settings, $store);

return $settings;
}

Expand Down Expand Up @@ -223,4 +225,29 @@ public function getDynamicIndexSettings($store)

return $settings;
}

/**
* Alter analysis settings to apply possible custom language stemmer.
*
* @param array $analysisSettings Analysis settings.
* @param integer|string|\Magento\Store\Api\Data\StoreInterface $store Store.
*
* @return array
*/
protected function applyCustomLanguageStemmer($analysisSettings, $store)
{
if ($this->helper->hasCustomLanguageStemmer($store)
&& is_array($analysisSettings)
&& array_key_exists('filter', $analysisSettings)
&& array_key_exists('stemmer', $analysisSettings['filter'])
&& array_key_exists('language', $analysisSettings['filter']['stemmer'])
) {
$customStemmer = $this->helper->getCustomLanguageStemmer($store) ?? false;
if (!empty($customStemmer)) {
$analysisSettings['filter']['stemmer']['language'] = $customStemmer;
}
}

return $analysisSettings;
}
}
Loading

0 comments on commit 9031672

Please sign in to comment.