Skip to content

Commit

Permalink
Merge pull request #767 from afoucret/fix-synonyms-parsing
Browse files Browse the repository at this point in the history
Fix synonyms handling when number of words is > 2 or contains an hyphen.
  • Loading branch information
afoucret authored Feb 20, 2018
2 parents 30de2ce + e470298 commit 7fc64b7
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 5 deletions.
8 changes: 7 additions & 1 deletion src/module-elasticsuite-thesaurus/Model/Index.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ class Index
/**
* @var string
*/
const WORD_DELIMITER = '-';
const WORD_DELIMITER = '_';

/**
* @var integer
*/
const MAX_SIZE = 10;

/**
* @var \Elasticsearch\Client
Expand Down Expand Up @@ -218,6 +223,7 @@ private function getSynonymRewrites($storeId, $queryText, $type)
foreach ($analysis['tokens'] as $token) {
if ($token['type'] == 'SYNONYM') {
$positionKey = sprintf('%s_%s', $token['start_offset'], $token['end_offset']);
$token['token'] = str_replace('_', ' ', $token['token']);
$synonymByPositions[$positionKey][] = $token;
}
}
Expand Down
16 changes: 12 additions & 4 deletions src/module-elasticsuite-thesaurus/Model/Indexer/IndexHandler.php
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,15 @@ private function getIndexSettings($synonyms, $expansions)
];

$settings['analysis']['filter']['shingle'] = [
'type' => 'shingle',
'output_false' => true,
'token_separator' => ThesaurusIndex::WORD_DELIMITER,
'type' => 'shingle',
'output_false' => true,
'token_separator' => ThesaurusIndex::WORD_DELIMITER,
'max_shingle_size' => ThesaurusIndex::MAX_SIZE,
];

$settings['analysis']['filter']['type_filter'] = [
'type' => 'keep_types',
'types' => [ "SYNONYM" ],
];

$settings = $this->addAnalyzerSettings($settings, 'synonym', $synonyms);
Expand Down Expand Up @@ -140,6 +146,8 @@ private function addAnalyzerSettings($settings, $type, $values)
$settings['analysis']['analyzer'][$type]['filter'][] = $type;
}

$settings['analysis']['analyzer'][$type]['filter'][] = 'type_filter';

return $settings;
}

Expand All @@ -154,7 +162,7 @@ private function addAnalyzerSettings($settings, $type, $values)
private function prepareSynonymFilterData($rows)
{
$rowMaper = function ($row) {
return preg_replace('/([\w])\s(?=[\w])/', '\1-', $row);
return preg_replace('/([\w])[\s-](?=[\w])/', '\1_', $row);
};

return array_map($rowMaper, $rows);
Expand Down

0 comments on commit 7fc64b7

Please sign in to comment.