Skip to content

Commit

Permalink
tweak scoring to prevent match scattering
Browse files Browse the repository at this point in the history
  • Loading branch information
bpasero committed Oct 20, 2017
1 parent 5b4c9d2 commit 9d40f0c
Show file tree
Hide file tree
Showing 2 changed files with 252 additions and 123 deletions.
280 changes: 159 additions & 121 deletions src/vs/base/parts/quickopen/common/quickOpenScorer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,180 +14,218 @@ import { stripWildcards } from 'vs/base/common/strings';
export type Score = [number /* score */, number[] /* match positions */];
export type ScorerCache = { [key: string]: IItemScore };

const NO_SCORE: Score = [0, []];
const NO_MATCH = 0;
const NO_SCORE: Score = [NO_MATCH, []];

export function _doScore(target: string, query: string, queryLower: string, fuzzy: boolean): Score {
// const DEBUG = false;
// const DEBUG_MATRIX = false;

export function score(target: string, query: string, queryLower: string, fuzzy: boolean): Score {
if (!target || !query) {
return NO_SCORE; // return early if target or query are undefined
}

if (target.length < query.length) {
const targetLength = target.length;
const queryLength = query.length;

if (targetLength < queryLength) {
return NO_SCORE; // impossible for query to be contained in target
}

// console.group(`Target: ${target}, Query: ${query}`);
// if (DEBUG) {
// console.group(`Target: ${target}, Query: ${query}`);
// }

const queryLen = query.length;
const targetLower = target.toLowerCase();

let res = NO_SCORE;

// When not searching fuzzy, we require the query to be contained fully
// in the target string. We set the offset to search from to that location.
// in the target string contiguously.
if (!fuzzy) {
const indexOfQueryInTarget = targetLower.indexOf(queryLower);
if (indexOfQueryInTarget === -1) {
// console.log(`Characters not matching consecutively ${queryLower} within ${targetLower}`);
// if (DEBUG) {
// console.log(`Characters not matching consecutively ${queryLower} within ${targetLower}`);
// }

return NO_SCORE;
}

res = _doScoreFromOffset(target, query, targetLower, queryLower, queryLen, indexOfQueryInTarget);
}

// When searching fuzzy we run the scorer for each location of the first query
// character so that we can produce better results in case the pattern matches
// multiple times on the target (prevent scattering of matching positions).
// When searching fuzzy, we require the query to be contained fully
// in the target string as separate substrings
else {
const queryFirstCharacter = queryLower[0];

let offset = 0;
while ((offset = targetLower.indexOf(queryFirstCharacter, offset)) !== -1) {
const scoreFromOffset = _doScoreFromOffset(target, query, targetLower, queryLower, queryLen, offset);
if (isBetterScore(res, scoreFromOffset)) {
res = scoreFromOffset;
let targetOffset = 0;
for (let queryIndex = 0; queryIndex < queryLength; queryIndex++) {
targetOffset = targetLower.indexOf(queryLower[queryIndex], targetOffset);
if (targetOffset === -1) {
return NO_SCORE;
}

offset++;
}
}

// console.log(`%cFinal Score: ${score}`, 'font-weight: bold');
// console.groupEnd();
const res = doScore(query, queryLower, queryLength, target, targetLower, targetLength);

// if (DEBUG) {
// console.log(`%cFinal Score: ${res[0]}`, 'font-weight: bold');
// console.groupEnd();
// }

return res;
}

function isBetterScore(score: Score, candidate: Score): boolean {
if (candidate[0] > score[0]) {
return true; // candidate has higher score
}
function doScore(query: string, queryLower: string, queryLength: number, target: string, targetLower: string, targetLength: number): [number, number[]] {
const scores = [];
const matches = [];

//
// Build Scorer Matrix
// The matrix is composed of query q and target t. For each index we score
// q[i] with t[i] and compare that with the previous score. If the score is
// equal or larger, we keep the match. In addition to the score, we also keep
// the length of the consecutive matches to use as boost for the score.
//
// t a r g e t
// q
// u
// e
// r
// y
//
for (let queryIndex = 0; queryIndex < queryLength; queryIndex++) {
for (let targetIndex = 0; targetIndex < targetLength; targetIndex++) {
const currentIndex = queryIndex * targetLength + targetIndex;
const leftIndex = currentIndex - 1;
const diagIndex = (queryIndex - 1) * targetLength + targetIndex - 1;

const leftScore = targetIndex > 0 ? scores[leftIndex] : 0;
const diagScore = queryIndex > 0 && targetIndex > 0 ? scores[diagIndex] : 0;

const matchesSequenceLength = queryIndex > 0 && targetIndex > 0 ? matches[diagIndex] : 0;

const score = computeCharScore(query, queryLower, queryIndex, target, targetLower, targetIndex, matchesSequenceLength);

// We have a score and its equal or larger than the left score
// Match: sequence continues growing from previous diag value
// Score: increases by diag score value
if (score && diagScore + score >= leftScore) {
matches[currentIndex] = matchesSequenceLength + 1;
scores[currentIndex] = diagScore + score;
}

if (score[0] > candidate[0]) {
return false; // candidate has lower score
// We either have no score or the score is lower than the left score
// Match: reset to 0
// Score: pick up from left hand side
else {
matches[currentIndex] = NO_MATCH;
scores[currentIndex] = leftScore;
}
}
}

// Score is the same, check by match compactness
const matchStart = score[1][0];
const matchEnd = score[1][score[1].length - 1];
const matchLength = matchEnd - matchStart;

const candidateMatchStart = candidate[1][0];
const candidateMatchEnd = candidate[1][candidate[1].length - 1];
const candidateMatchLength = candidateMatchEnd - candidateMatchStart;

if (candidateMatchLength < matchLength) {
return true; // candidate has more compact matches
// Restore Positions (starting from bottom right of matrix)
const positions = [];
let queryIndex = queryLength - 1;
let targetIndex = targetLength - 1;
while (queryIndex >= 0 && targetIndex >= 0) {
const currentIndex = queryIndex * targetLength + targetIndex;
const match = matches[currentIndex];
if (match === NO_MATCH) {
targetIndex--; // go left
} else {
positions.push(targetIndex);

// go up and left
queryIndex--;
targetIndex--;
}
}

return false;
// Print matrix
// if (DEBUG_MATRIX) {
// printMatrix(query, target, matches, scores);
// }

return [scores[queryLength * targetLength - 1], positions.reverse()];
}

// Based on material from:
/*!
BEGIN THIRD PARTY
*/
/*!
* string_score.js: String Scoring Algorithm 0.1.22
*
* http://joshaven.com/string_score
* https://github.com/joshaven/string_score
*
* Copyright (C) 2009-2014 Joshaven Potter <yourtech@gmail.com>
* Special thanks to all of the contributors listed here https://github.com/joshaven/string_score
* MIT License: http://opensource.org/licenses/MIT
*
* Date: Tue Mar 1 2011
* Updated: Tue Mar 10 2015
*/
function _doScoreFromOffset(target: string, query: string, targetLower: string, queryLower: string, queryLen: number, offset: number): Score {
const matchingPositions: number[] = [];

let targetIndex = offset;
let queryIndex = 0;
function computeCharScore(query: string, queryLower: string, queryIndex: number, target: string, targetLower: string, targetIndex: number, matchesSequenceLength: number): number {
let score = 0;
while (queryIndex < queryLen) {

// Check for query character being contained in target
const indexOfQueryInTarget = targetLower.indexOf(queryLower[queryIndex], targetIndex);

if (indexOfQueryInTarget < 0) {
// console.log(`Character not part of target ${query[index]}`);

score = 0;
break;
}

// Fill into positions array
matchingPositions.push(indexOfQueryInTarget);

// Character match bonus
score += 1;

// console.groupCollapsed(`%cCharacter match bonus: +1 (char: ${query[index]} at index ${indexOf}, total score: ${score})`, 'font-weight: normal');
if (queryLower[queryIndex] !== targetLower[targetIndex]) {
return score; // no match of characters
}

// Consecutive match bonus
if (targetIndex === indexOfQueryInTarget && queryIndex > 0) {
score += 5;
// Character match bonus
score += 1;

// console.log('Consecutive match bonus: +5');
}
// if (DEBUG) {
// console.groupCollapsed(`%cCharacter match bonus: +1 (char: ${queryLower[queryIndex]} at index ${targetIndex}, total score: ${score})`, 'font-weight: normal');
// }

// Same case bonus
if (target[indexOfQueryInTarget] === query[queryIndex]) {
score += 1;
// Consecutive match bonus
if (matchesSequenceLength > 0) {
score += (matchesSequenceLength * 5);

// console.log('Same case bonus: +1');
}
// if (DEBUG) {
// console.log('Consecutive match bonus: ' + (matchesSequenceLength * 5));
// }
}

// Start of word bonus
if (indexOfQueryInTarget === 0) {
score += 8;
// Same case bonus
if (query[queryIndex] === target[targetIndex]) {
score += 1;

// console.log('Start of word bonus: +8');
}
// if (DEBUG) {
// console.log('Same case bonus: +1');
// }
}

// After separator bonus
else if (isSeparatorAtPos(target, indexOfQueryInTarget - 1)) {
score += 7;
// Start of word bonus
if (targetIndex === 0) {
score += 8;

// console.log('After separtor bonus: +7');
}
// if (DEBUG) {
// console.log('Start of word bonus: +8');
// }
}

// Inside word upper case bonus
else if (isUpper(target.charCodeAt(indexOfQueryInTarget))) {
score += 1;
// After separator bonus
else if (isSeparatorAtPos(target, targetIndex - 1)) {
score += 4;

// console.log('Inside word upper case bonus: +1');
}
// if (DEBUG) {
// console.log('After separtor bonus: +4');
// }
}

// console.groupEnd();
// Inside word upper case bonus
else if (isUpper(target.charCodeAt(targetIndex))) {
score += 1;

targetIndex = indexOfQueryInTarget + 1;
queryIndex++;
// if (DEBUG) {
// console.log('Inside word upper case bonus: +1');
// }
}

const res: Score = (score > 0) ? [score, matchingPositions] : NO_SCORE;

// console.log(`%cFinal Score: ${score}`, 'font-weight: bold');
// console.groupEnd();
// if (DEBUG) {
// console.groupEnd();
// }

return res;
return score;
}

/*!
END THIRD PARTY
*/
// function printMatrix(query: string, target: string, matches: number[], scores: number[]): void {
// console.log('\t' + target.split('').join('\t'));
// for (let queryIndex = 0; queryIndex < query.length; queryIndex++) {
// let line = query[queryIndex] + '\t';
// for (let targetIndex = 0; targetIndex < target.length; targetIndex++) {
// const currentIndex = queryIndex * target.length + targetIndex;
// line = line + 'M' + matches[currentIndex] + '/' + 'S' + scores[currentIndex] + '\t';
// }

// console.log(line);
// }
// }

/**
* Scoring on structural items that have a label and optional description.
Expand Down Expand Up @@ -315,7 +353,7 @@ function doScoreItem<T>(label: string, description: string, path: string, query:
}

// 4.) prefer scores on the label if any
const [labelScore, labelPositions] = _doScore(label, query.value, query.lowercase, fuzzy);
const [labelScore, labelPositions] = score(label, query.value, query.lowercase, fuzzy);
if (labelScore) {
return { score: labelScore + LABEL_SCORE_THRESHOLD, labelMatch: createMatches(labelPositions) };
}
Expand All @@ -331,7 +369,7 @@ function doScoreItem<T>(label: string, description: string, path: string, query:
const descriptionPrefixLength = descriptionPrefix.length;
const descriptionAndLabel = `${descriptionPrefix}${label}`;

const [labelDescriptionScore, labelDescriptionPositions] = _doScore(descriptionAndLabel, query.value, query.lowercase, fuzzy);
const [labelDescriptionScore, labelDescriptionPositions] = score(descriptionAndLabel, query.value, query.lowercase, fuzzy);
if (labelDescriptionScore) {
const labelDescriptionMatches = createMatches(labelDescriptionPositions);
const labelMatch: IMatch[] = [];
Expand Down
Loading

0 comments on commit 9d40f0c

Please sign in to comment.