Skip to content

Commit

Permalink
Add suggestField function
Browse files Browse the repository at this point in the history
  • Loading branch information
VincentFoulon80 committed Feb 6, 2020
1 parent 26d0395 commit c3926f1
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 12 deletions.
28 changes: 26 additions & 2 deletions Engine.php
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,30 @@ public function search($query, $filters = []){
return $this->index->search($query, $filters);
}

/**
* @param $token
* @param bool $providePonderations
* @return array
* @throws Exception
* @deprecated Suggesting functions now have another suggestion function available. Please use suggestToken($token) instead
*/
public function suggest($token){
return $this->suggestToken($token);
}

/**
* Suggest last word for a search
* @param $query
* @return array
* @throws Exception
*/
public function suggest($query){
public function suggestToken($query){
$terms = explode(' ', $query);
$search = array_pop($terms);
$tokens = $this->index->tokenizeQuery($search);
$suggestions = [];
foreach($tokens as $token) {
$suggestions = array_replace($suggestions, $this->index->suggest($token));
$suggestions = array_replace($suggestions, $this->index->suggestToken($token));
}
$before = implode(' ',$terms);
foreach($suggestions as &$suggest){
Expand All @@ -104,6 +115,18 @@ public function suggest($query){
return array_chunk($suggestions, 10)[0];
}

/**
* @param $field
* @param $value
* @param bool|string $wrapSpan if true, wrap <span> tags around the matching values.
* if it's a string, adds the string as a class
* @return array
* @throws Exception
*/
public function suggestField($field, $value, $wrapSpan = false){
return $this->index->suggestField($field, $value, $wrapSpan);
}

/**
* delete the given document ID from the index
* @param $id
Expand All @@ -126,6 +149,7 @@ private function getDefaultConfig(){
'documents_dir' => DIRECTORY_SEPARATOR.'engine'.DIRECTORY_SEPARATOR.'documents',
'cache_dir' => DIRECTORY_SEPARATOR.'engine'.DIRECTORY_SEPARATOR.'cache',
'fuzzy_cost' => 1,
'approximate_limit' => 5,
'connex' => [
'threshold' => 0.9,
'min' => 3,
Expand Down
84 changes: 77 additions & 7 deletions Services/Index.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ class Index
*/
private $updatingId;

/**
* @var int $approximateCount
*/
private $approximateCount;



/**
* Index constructor.
Expand Down Expand Up @@ -268,6 +274,7 @@ public function search($query, $filters = [])
$results = [];
if(!empty($tokens)){
foreach($tokens as $token){
$this->approximateCount = 0;
$this->computeScore($results, $this->find($token));
}
} else {
Expand Down Expand Up @@ -301,6 +308,7 @@ public function search($query, $filters = [])
$tokens = $this->tokenizeQuery($query->getValue());
if(!empty($tokens)){
foreach($tokens as $token){
$this->approximateCount = 0;
$this->computeScore($regularResult, $this->find($token));
}
}
Expand Down Expand Up @@ -595,14 +603,71 @@ private function find($token){
}

/**
* Suggest a list of words matching the provided $token
* @param $token
* @param bool $providePonderations
* @return array
* @throws Exception
* @deprecated Suggesting functions now have another suggestion function available. Please use suggestToken($token, $providePonderations) instead
*/
public function suggest($token, $providePonderations = false){
return $this->suggestToken($token, $providePonderations);
}

/**
* @param $field
* @param $value
* @param bool|string $wrapSpan if true, wrap <span> tags around the matching values.
* if it's a string, adds the string as a class
* @return array
* @throws Exception
*/
public function suggestField($field, $value, $wrapSpan = false)
{
$cached = $this->getCache('suggest_'.md5($field.'_'.$value.'_'.$wrapSpan));
if(!empty($cached)){
return $cached;
}
$exactFile = $this->index->open('exact_'.$field);
if($exactFile !== null){
$value = strtolower($value);
$exactContent = array_keys($exactFile->getContent());
$matching = [];
foreach($exactContent as $exactValue){
$exactValue = strtolower($exactValue);
$strPos = strpos($exactValue, $value);
if($strPos !== false){
if($wrapSpan !== false){
$span = '<span';
if(is_string($wrapSpan)){
$span .= ' class="'.$wrapSpan.'"';
}
$span .= '>';
$exactValue = str_replace($value, $span.$value.'</span>', $exactValue);
}
$matching[$exactValue] = $strPos;
}
}
asort($matching);
$matching = array_keys($matching);
$this->setCache('suggest_'.md5($field.'_'.$value.'_'.$wrapSpan),$matching);
return $matching;
}
return [];
}

/**
* @param $token
* @param bool $providePonderations
* @return array
* @throws Exception
*/
public function suggestToken($token, $providePonderations = false)
{
if(empty($token)) return [];
$cached = $this->getCache('suggestToken_'.md5($token.'_'.$providePonderations));
if(!empty($cached)){
return $cached;
}
$all = $this->index->open('all');
$tokens = array_keys($all->getContent());
$matching = [];
Expand All @@ -613,10 +678,11 @@ public function suggest($token, $providePonderations = false){
}
}
asort($matching);
if($providePonderations){
return $matching;
if(!$providePonderations){
$matching = array_keys($matching);
}
return array_keys($matching);
$this->setCache('suggestToken_'.md5($token.'_'.$providePonderations), $matching);
return $matching;
}

/**
Expand All @@ -629,9 +695,13 @@ public function suggest($token, $providePonderations = false){
private function fuzzyFind($token)
{
if(empty($token) || $this->config['fuzzy_cost'] == 0) return [];
$matching = $this->suggest($token, true);
$matching = $this->suggestToken($token, true);
if(empty($matching)){
$matching = $this->approximate($token, $this->config['fuzzy_cost']);
if($this->config['approximate_limit'] < 0 || $this->approximateCount < $this->config['approximate_limit']) {
// approximate_limit is here for preventing the usage of this CPU intensive function
$matching = $this->approximate($token, $this->config['fuzzy_cost']);
$this->approximateCount++;
}
}
$found = [];
if(!empty($matching)){
Expand All @@ -658,7 +728,7 @@ private function fuzzyFind($token)
* @throws Exception
*/
private function approximate($term, $cost, $positions = []){
$cached = $this->getCache('approx_'.$term);
$cached = $this->getCache('approx_'.base64_encode($term));
if(!empty($cached)){
return $cached;
}
Expand Down
7 changes: 4 additions & 3 deletions templates/index.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ function displayConfigArray($array, $prefix = ''){
'documents_dir' => 'Subdirectory of var_dir where documents will be stored',
'cache_dir' => 'Subdirectory of var_dir where the cache will be stored',
'fuzzy_cost' => 'Define how many iterations of approximation will be ran on any non-found tokens. Greater is more CPU-intensive and too much won\'t help find accurately',
'approximate_limit' => '(-1 = infinite) Define how many times the approximate function will be ran, per token. This function is CPU intensive and we don\'t want it to run in loop when the user typed nonsense',
'connex.threshold' => '(percentage 0-1) Every document with a score that matches this threshold will be included to the connex search',
'connex.min' => 'Minimum number of documents that will be included into the connex search',
'connex.max' => 'Maximum number of documents that will be included into the connex search',
'connex.limitToken' => 'Maximum of tokens that will be retained in the connex search',
'connex.min' => 'Minimum number of documents that will be internally included into the connex search',
'connex.max' => 'Maximum number of documents that will be internally included into the connex search',
'connex.limitToken' => 'Maximum number of tokens that will be retained in the connex search',
'connex.limitDocs' => 'Maximum number of documents that\'ll be returned from the connex search'
];
foreach($array as $name => $value){
Expand Down

0 comments on commit c3926f1

Please sign in to comment.