This repository has been archived by the owner on Sep 6, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7.6k
/
StringMatch.js
997 lines (879 loc) · 42.6 KB
/
StringMatch.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
/*
* Copyright (c) 2013 - present Adobe Systems Incorporated. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*/
/*unittests: StringMatch */
define(function (require, exports, module) {
"use strict";
var _ = require("thirdparty/lodash");
/*
* Performs matching that is useful for QuickOpen and similar searches.
*/
/** Object representing a search result with associated metadata (added as extra ad hoc fields) */
function SearchResult(label) {
this.label = label;
}
/*
* Identifies the "special" characters in the given string.
* Special characters for matching purposes are:
*
* * the first character
* * "/" and the character following the "/"
* * "_", "." and "-" and the character following it
* * an uppercase character that follows a lowercase one (think camelCase)
*
* The returned object contains an array called "specials". This array is
* a list of indexes into the original string where all of the special
* characters are. It also has a property "lastSegmentSpecialsIndex" which
* is an index into the specials array that denotes which location is the
* beginning of the last path segment. (This is used to allow scanning of
* the last segment's specials separately.)
*
* @param {string} input string to break apart (e.g. filename that is being searched)
* @return {{specials:Array.<number>, lastSegmentSpecialsIndex:number}}
*/
function findSpecialCharacters(str) {
var i, c;
// the beginning of the string is always special
var specials = [0];
// lastSegmentSpecialsIndex starts off with the assumption that
// there are no segments
var lastSegmentSpecialsIndex = 0;
// used to track down the camelCase changeovers
var lastWasLowerCase = false;
for (i = 0; i < str.length; i++) {
c = str[i];
if (c === "/") {
// new segment means this character and the next are special
specials.push(i++);
specials.push(i);
lastSegmentSpecialsIndex = specials.length - 1;
lastWasLowerCase = false;
} else if (c === "." || c === "-" || c === "_") {
// _, . and - are separators so they are
// special and so is the next character
specials.push(i);
if (str[i + 1] !== "/") {
// if the next key is a slash, handle it separately
// see #10871
specials.push(++i);
}
lastWasLowerCase = false;
} else if (c.toUpperCase() === c) {
// this is the check for camelCase changeovers
if (lastWasLowerCase) {
specials.push(i);
}
lastWasLowerCase = false;
} else {
lastWasLowerCase = true;
}
}
return {
specials: specials,
lastSegmentSpecialsIndex: lastSegmentSpecialsIndex
};
}
// states used during the scanning of the string
var SPECIALS_MATCH = 0;
var ANY_MATCH = 1;
// Scores can be hard to make sense of. The DEBUG_SCORES flag
// provides a way to peek into the parts that made up a score.
// This flag is used for manual debugging and in the unit tests only.
var DEBUG_SCORES = false;
function _setDebugScores(ds) {
DEBUG_SCORES = ds;
}
// Constants for scoring
var SPECIAL_POINTS = 40;
var MATCH_POINTS = 10;
var UPPER_CASE_MATCH = 100;
var CONSECUTIVE_MATCHES_POINTS = 8;
var BEGINNING_OF_NAME_POINTS = 13;
var LAST_SEGMENT_BOOST = 1;
var DEDUCTION_FOR_LENGTH = 0.2;
var NOT_STARTING_ON_SPECIAL_PENALTY = 25;
// Used in match lists to designate matches of "special" characters (see
// findSpecialCharacters above
function SpecialMatch(index, upper) {
this.index = index;
if (upper) {
this.upper = upper;
}
}
// Used in match lists to designate any matched characters that are not special
function NormalMatch(index, upper) {
this.index = index;
if (upper) {
this.upper = upper;
}
}
/*
* Finds the best matches between the query and the string. The query is
* compared with str (usually a lower case string with a lower case
* query).
*
* Generally speaking, this function tries to find "special" characters
* (see findSpecialCharacters above) first. Failing that, it starts scanning
* the "normal" characters. Sometimes, it will find a special character that matches
* but then not be able to match the rest of the query. In cases like that, the
* search will backtrack and try to find matches for the whole query earlier in the
* string.
*
* A contrived example will help illustrate how the searching and backtracking works. It's a bit long,
* but it illustrates different pieces of the algorithm which can be tricky. Let's say that we're
* searching the string "AzzBzzCzdzezzDgxgEF" for "abcdex".
*
* To start with, it will match "abcde" from the query to "A B C D E" in the string (the spaces
* represent gaps in the matched part of the string), because those are all "special characters".
* However, the "x" in the query doesn't match the "F" which is the only character left in the
* string.
*
* Backtracking kicks in. The "E" is pulled off of the match list.
* deadBranches[4] is set to the "g" before the "E". This means that for the 5th
* query character (the "e") we know that we don't have a match beyond that point in the string.
*
* To resume searching, the backtrack function looks at the previous match (the "D") and starts
* searching in character-by-character (ANY_MATCH) mode right after that. It fails to find an
* "e" before it gets to deadBranches[4], so it has to backtrack again.
*
* This time, the "D" is pulled off the match list.
* deadBranches[3] is set to the "z" before the "D", because we know that for the "dex" part of the
* query, we can't make it work past the "D". We'll resume searching with the "z" after the "C".
*
* Doing an ANY_MATCH search, we find the "d". We then start searching specials for "e", but we
* stop before we get to "E" because deadBranches[4] tells us that's a dead end. So, we switch
* to ANY_MATCH and find the "e".
*
* Finally, we search for the "x". We don't find a special that matches, so we start an ANY_MATCH
* search. Then we find the "x", and we have a successful match.
*
* Here are some notes on how the algorithm works:
*
* * We only backtrack() when we're exhausted both special AND normal forward searches past that point,
* for the query remainder we currently have. For a different query remainder, we may well get further
* along - hence deadBranches[] being dependent on queryCounter; but in order to get a different query
* remainder, we must give up one or more current matches by backtracking.
*
* * Normal "any char" forward search is a superset of special matching mode -- anything that would have
* been matched in special mode *could* also be matched by normal mode. In practice, however,
* any special characters that could have matched would be picked up first by the specials matching
* code.
*
* * backtrack() always goes at least as far back as str[deadBranches[queryCounter]-1] before allowing
* forward searching to resume
*
* * When `deadBranches[queryCounter] = strCounter` it means if we're still trying to match
* `queryLower[queryCounter]` and we get to `str[strCounter]`, there's no way we can match the
* remainer of `queryLower` with the remainder of `str` -- either using specials-only or
* full any-char matching.
*
* * We know this because deadBranches[] is set in backtrack(), and we don't get to backtrack() unless
* either:
* 1. We've already exhausted both special AND normal forward searches past that point
* (i.e. backtrack() due to `strCounter >= str.length`, yet `queryCounter < query.length`)
* 2. We stopped searching further forward due to a previously set deadBranches[] value
* (i.e. backtrack() due to `strCounter > deadBranches[queryCounter]`, yet
* `queryCounter < query.length`)
*
* @param {string} query the search string (generally lower cased)
* @param {string} str the string to compare with (generally lower cased)
* @param {string} originalQuery the "non-normalized" query string (used to detect case match priority)
* @param {string} originalStr the "non-normalized" string to compare with (used to detect case match priority)
* @param {Array} specials list of special indexes in str (from findSpecialCharacters)
* @param {int} startingSpecial index into specials array to start scanning with
* @return {Array.<SpecialMatch|NormalMatch>} matched indexes or null if no matches possible
*/
function _generateMatchList(query, str, originalQuery, originalStr, specials, startingSpecial) {
var result = [];
// used to keep track of which special character we're testing now
var specialsCounter = startingSpecial;
// strCounter and queryCounter are the indexes used for pulling characters
// off of the str/compareLower and query.
var strCounter = specials[startingSpecial];
var queryCounter;
// the search branches out between special characters and normal characters
// that are found via consecutive character scanning. In the process of
// performing these scans, we discover that parts of the query will not match
// beyond a given point in the string. We keep track of that information
// in deadBranches, which has a slot for each character in the query.
// The value stored in the slot is the index into the string after which we
// are certain there is no match.
var deadBranches = [];
for (queryCounter = 0; queryCounter < query.length; queryCounter++) {
deadBranches[queryCounter] = Infinity;
}
queryCounter = 0;
var state = SPECIALS_MATCH;
// Compares the current character from the query string against the
// special characters in str. Returns true if a match was found,
// false otherwise.
function findMatchingSpecial() {
// used to loop through the specials
var i;
for (i = specialsCounter; i < specials.length; i++) {
// short circuit this search when we know there are no matches following
if (specials[i] >= deadBranches[queryCounter]) {
break;
}
// First, ensure that we're not comparing specials that
// come earlier in the string than our current search position.
// This can happen when the string position changes elsewhere.
if (specials[i] < strCounter) {
specialsCounter = i;
} else if (query[queryCounter] === str[specials[i]]) {
// we have a match! do the required tracking
strCounter = specials[i];
// Upper case match check:
// If the query and original string matched, but the original string
// and the lower case version did not, that means that the original
// was upper case.
var upper = originalQuery[queryCounter] === originalStr[strCounter] && originalStr[strCounter] !== str[strCounter];
result.push(new SpecialMatch(strCounter, upper));
specialsCounter = i;
queryCounter++;
strCounter++;
return true;
}
}
return false;
}
// This function implements the backtracking that is done when we fail to find
// a match with the query using the "search for specials first" approach.
//
// returns false when it is not able to backtrack successfully
function backtrack() {
// The idea is to pull matches off of our match list, rolling back
// characters from the query. We pay special attention to the special
// characters since they are searched first.
while (result.length > 0) {
var item = result.pop();
// nothing in the list? there's no possible match then.
if (!item) {
return false;
}
// we pulled off a match, which means that we need to put a character
// back into our query. strCounter is going to be set once we've pulled
// off the right special character and know where we're going to restart
// searching from.
queryCounter--;
if (item instanceof SpecialMatch) {
// pulled off a special, which means we need to make that special available
// for matching again
specialsCounter--;
// check to see if we've gone back as far as we need to
if (item.index < deadBranches[queryCounter]) {
// we now know that this part of the query does not match beyond this
// point
deadBranches[queryCounter] = item.index - 1;
// since we failed with the specials along this track, we're
// going to reset to looking for matches consecutively.
state = ANY_MATCH;
// we figure out where to start looking based on the new
// last item in the list. If there isn't anything else
// in the match list, we'll start over at the starting special
// (which is generally the beginning of the string, or the
// beginning of the last segment of the string)
item = result[result.length - 1];
if (!item) {
strCounter = specials[startingSpecial] + 1;
return true;
}
strCounter = item.index + 1;
return true;
}
}
}
return false;
}
while (true) {
// keep looping until we've either exhausted the query or the string
while (queryCounter < query.length && strCounter < str.length && strCounter <= deadBranches[queryCounter]) {
if (state === SPECIALS_MATCH) {
if (!findMatchingSpecial()) {
state = ANY_MATCH;
}
}
if (state === ANY_MATCH) {
// we look character by character for matches
if (query[queryCounter] === str[strCounter]) {
// got a match! record it, and switch back to searching specials
// See the specials section above for a comment on the expression
// for `upper` below.
var upper = originalQuery[queryCounter] === originalStr[strCounter] && originalStr[strCounter] !== str[strCounter];
result.push(new NormalMatch(strCounter++, upper));
queryCounter++;
state = SPECIALS_MATCH;
} else {
// no match, keep looking
strCounter++;
}
}
}
// if we've finished the query, or we haven't finished the query but we have no
// more backtracking we can do, then we're all done searching.
if (queryCounter >= query.length || (queryCounter < query.length && !backtrack())) {
break;
}
}
// return null when we don't find anything
if (queryCounter < query.length || result.length === 0) {
return null;
}
return result;
}
/*
* Seek out the best match in the last segment (generally the filename).
* Matches in the filename are preferred, but the query entered could match
* any part of the path. So, we find the best match we can get in the filename
* and then allow for searching the rest of the string with any characters that
* are left from the beginning of the query.
*
* The parameters and return value are the same as for getMatchRanges,
* except this function is always working on the last segment and the
* result can optionally include a remainder, which is the characters
* at the beginning of the query which did not match in the last segment.
*
* @param {string} query the search string (generally lower cased)
* @param {string} str the string to compare with (generally lower cased)
* @param {string} originalQuery the "non-normalized" query string (used to detect case match priority)
* @param {string} originalStr the "non-normalized" string to compare with (used to detect case match priority)
* @param {Array} specials list of special indexes in str (from findSpecialCharacters)
* @param {int} startingSpecial index into specials array to start scanning with
* @param {boolean} lastSegmentStart which character does the last segment start at
* @return {{remainder:int, matchList:Array.<SpecialMatch|NormalMatch>}} matched indexes or null if no matches possible
*/
function _lastSegmentSearch(query, str, originalQuery, originalStr, specials, startingSpecial, lastSegmentStart) {
var queryCounter, matchList;
// It's possible that the query is longer than the last segment.
// If so, we can chop off the bit that we know couldn't possibly be there.
var remainder = "",
originalRemainder = "",
extraCharacters = specials[startingSpecial] + query.length - str.length;
if (extraCharacters > 0) {
remainder = query.substring(0, extraCharacters);
originalRemainder = originalQuery.substring(0, extraCharacters);
query = query.substring(extraCharacters);
originalQuery = originalQuery.substring(extraCharacters);
}
for (queryCounter = 0; queryCounter < query.length; queryCounter++) {
matchList = _generateMatchList(query.substring(queryCounter),
str, originalQuery.substring(queryCounter),
originalStr, specials, startingSpecial);
// if we've got a match *or* there are no segments in this string, we're done
if (matchList || startingSpecial === 0) {
break;
}
}
if (queryCounter === query.length || !matchList) {
return null;
} else {
return {
remainder: remainder + query.substring(0, queryCounter),
originalRemainder: originalRemainder + originalQuery.substring(0, queryCounter),
matchList: matchList
};
}
}
/*
* Implements the top-level search algorithm. Search the last segment first,
* then search the rest of the string with the remainder.
*
* The parameters and return value are the same as for getMatchRanges.
*
* @param {string} queryLower the search string (will be searched lower case)
* @param {string} compareLower the lower-cased string to search
* @param {string} originalQuery the "non-normalized" query string (used to detect case match priority)
* @param {string} originalStr the "non-normalized" string to compare with (used to detect case match priority)
* @param {Array} specials list of special indexes in str (from findSpecialCharacters)
* @param {int} lastSegmentSpecialsIndex index into specials array to start scanning with
* @return {Array.<SpecialMatch|NormalMatch>} matched indexes or null if no matches possible
*/
function _wholeStringSearch(queryLower, compareLower, originalQuery, originalStr, specials, lastSegmentSpecialsIndex) {
var lastSegmentStart = specials[lastSegmentSpecialsIndex];
var result;
var matchList;
result = _lastSegmentSearch(queryLower, compareLower, originalQuery, originalStr, specials, lastSegmentSpecialsIndex, lastSegmentStart);
if (result) {
matchList = result.matchList;
// Do we have more query characters that did not fit?
if (result.remainder) {
// Scan with the remainder only through the beginning of the last segment
var remainderMatchList = _generateMatchList(result.remainder,
compareLower.substring(0, lastSegmentStart),
result.originalRemainder,
originalStr.substring(0, lastSegmentStart),
specials.slice(0, lastSegmentSpecialsIndex), 0);
if (remainderMatchList) {
// add the new matched ranges to the beginning of the set of ranges we had
matchList.unshift.apply(matchList, remainderMatchList);
} else {
// no match
return null;
}
}
} else {
// No match in the last segment, so we start over searching the whole
// string
matchList = _generateMatchList(queryLower, compareLower, originalQuery, originalStr, specials, 0);
}
return matchList;
}
/**
* Converts a list of matches into a form suitable for returning from stringMatch.
*
* @param {Array.<SpecialMatch|NormalMatch>} matchList to convert
* @param {string} original string
* @param {int} character index where last segment begins
* @return {{ranges:Array.<{text:string, matched:boolean, includesLastSegment:boolean}>, matchGoodness:int, scoreDebug: Object}} matched ranges and score
*/
function _computeRangesAndScore(matchList, str, lastSegmentStart) {
var matchCounter;
var ranges = [];
var lastMatchIndex = -1;
var lastSegmentScore = 0;
var currentRangeStartedOnSpecial = false;
var score = 0;
var scoreDebug;
if (DEBUG_SCORES) {
scoreDebug = {
special: 0,
match: 0,
upper: 0,
lastSegment: 0,
beginning: 0,
lengthDeduction: 0,
consecutive: 0,
notStartingOnSpecial: 0
};
}
var currentRange = null;
// Records the current range and adds any additional ranges required to
// get to character index c. This function is called before starting a new range
// or returning from the function.
function closeRangeGap(c) {
// Close the current range
if (currentRange) {
currentRange.includesLastSegment = lastMatchIndex >= lastSegmentStart;
if (currentRange.matched && currentRange.includesLastSegment) {
if (DEBUG_SCORES) {
scoreDebug.lastSegment += lastSegmentScore * LAST_SEGMENT_BOOST;
}
score += lastSegmentScore * LAST_SEGMENT_BOOST;
}
if (currentRange.matched && !currentRangeStartedOnSpecial) {
if (DEBUG_SCORES) {
scoreDebug.notStartingOnSpecial -= NOT_STARTING_ON_SPECIAL_PENALTY;
}
score -= NOT_STARTING_ON_SPECIAL_PENALTY;
}
ranges.push(currentRange);
}
// If there was space between the new range and the last,
// add a new unmatched range before the new range can be added.
if (lastMatchIndex + 1 < c) {
ranges.push({
text: str.substring(lastMatchIndex + 1, c),
matched: false,
includesLastSegment: c > lastSegmentStart
});
}
currentRange = null;
lastSegmentScore = 0;
}
// In some cases (see the use of this variable below), we accelerate the
// bonus the more consecutive matches there are.
var numConsecutive = 0;
// Adds a matched character to the appropriate range
function addMatch(match) {
// Pull off the character index
var c = match.index;
var newPoints = 0;
// A match means that we need to do some scoring bookkeeping.
// Start with points added for any match
if (DEBUG_SCORES) {
scoreDebug.match += MATCH_POINTS;
}
newPoints += MATCH_POINTS;
if (match.upper) {
if (DEBUG_SCORES) {
scoreDebug.upper += UPPER_CASE_MATCH;
}
newPoints += UPPER_CASE_MATCH;
}
// A bonus is given for characters that match at the beginning
// of the filename
if (c === lastSegmentStart) {
if (DEBUG_SCORES) {
scoreDebug.beginning += BEGINNING_OF_NAME_POINTS;
}
newPoints += BEGINNING_OF_NAME_POINTS;
}
// If the new character immediately follows the last matched character,
// we award the consecutive matches bonus. The check for score > 0
// handles the initial value of lastMatchIndex which is used for
// constructing ranges but we don't yet have a true match.
if (score > 0 && lastMatchIndex + 1 === c) {
// Continue boosting for each additional match at the beginning
// of the name
if (c - numConsecutive === lastSegmentStart) {
if (DEBUG_SCORES) {
scoreDebug.beginning += BEGINNING_OF_NAME_POINTS;
}
newPoints += BEGINNING_OF_NAME_POINTS;
}
numConsecutive++;
var boost = CONSECUTIVE_MATCHES_POINTS * numConsecutive;
// Consecutive matches that started on a special are a
// good indicator of intent, so we award an added bonus there.
if (currentRangeStartedOnSpecial) {
boost = boost * 2;
}
if (DEBUG_SCORES) {
scoreDebug.consecutive += boost;
}
newPoints += boost;
} else {
numConsecutive = 1;
}
// add points for "special" character matches
if (match instanceof SpecialMatch) {
if (DEBUG_SCORES) {
scoreDebug.special += SPECIAL_POINTS;
}
newPoints += SPECIAL_POINTS;
}
score += newPoints;
// points accumulated in the last segment get an extra bonus
if (c >= lastSegmentStart) {
lastSegmentScore += newPoints;
}
// if the last range wasn't a match or there's a gap, we need to close off
// the range to start a new one.
if ((currentRange && !currentRange.matched) || c > lastMatchIndex + 1) {
closeRangeGap(c);
}
lastMatchIndex = c;
// set up a new match range or add to the current one
if (!currentRange) {
currentRange = {
text: str[c],
matched: true
};
// Check to see if this new matched range is starting on a special
// character. We penalize those ranges that don't, because most
// people will search on the logical boundaries of the name
currentRangeStartedOnSpecial = match instanceof SpecialMatch;
} else {
currentRange.text += str[c];
}
}
// scan through the matches, adding each one in turn
for (matchCounter = 0; matchCounter < matchList.length; matchCounter++) {
var match = matchList[matchCounter];
addMatch(match);
}
// add a range for the last part of the string
closeRangeGap(str.length);
// shorter strings that match are often better than longer ones
var lengthPenalty = -1 * Math.round(str.length * DEDUCTION_FOR_LENGTH);
if (DEBUG_SCORES) {
scoreDebug.lengthDeduction = lengthPenalty;
}
score = score + lengthPenalty;
var result = {
ranges: ranges,
matchGoodness: score
};
if (DEBUG_SCORES) {
result.scoreDebug = scoreDebug;
}
return result;
}
/*
* If we short circuit normal matching to produce a prefix match,
* this function will generate the appropriate SearchResult.
* This function assumes that the prefix match check has already
* been performed.
*
* @param {string} str The string with the prefix match for the query
* @param {string} query The query that matched the beginning of str
* @return {{ranges:Array.<{text:string, matched:boolean, includesLastSegment:boolean}>, matchGoodness:int, scoreDebug: Object}} ranges has a matching range for beginning of str
* and a non-matching range for the end of the str
* the score is -Number.MAX_VALUE in all cases
*/
function _prefixMatchResult(str, query) {
var result = new SearchResult(str);
result.matchGoodness = -Number.MAX_VALUE;
if (str.substr(0, query.length) !== query) {
// Penalize for not matching case
result.matchGoodness *= 0.5;
}
if (DEBUG_SCORES) {
result.scoreDebug = {
beginning: -result.matchGoodness
};
}
result.stringRanges = [{
text: str.substr(0, query.length),
matched: true,
includesLastSegment: true
}];
if (str.length > query.length) {
result.stringRanges.push({
text: str.substring(query.length),
matched: false,
includesLastSegment: true
});
}
return result;
}
/*
* Match str against the query using the QuickOpen algorithm provided by
* the functions above. The general idea is to prefer matches of "special" characters and,
* optionally, matches that occur in the "last segment" (generally, the filename). stringMatch
* will try to provide the best match and produces a "matchGoodness" score
* to allow for relative ranking.
*
* The result object returned includes "stringRanges" which can be used to highlight
* the matched portions of the string, in addition to the "matchGoodness"
* mentioned above. If DEBUG_SCORES is true, scoreDebug is set on the result
* to provide insight into the score.
*
* The matching is done in a case-insensitive manner.
*
* @param {string} str The string to search
* @param {string} query The query string to find in string
* @param {{preferPrefixMatches:?boolean, segmentedSearch:?boolean}} options to control search behavior.
* preferPrefixMatches puts an exact case-insensitive prefix match ahead of all other matches,
* even short-circuiting the match logic. This option implies segmentedSearch=false.
* When segmentedSearch is true, the string is broken into segments by "/" characters
* and the last segment is searched first and matches there are scored higher.
* @param {?Object} special (optional) the specials data from findSpecialCharacters, if already known
* This is generally just used by StringMatcher for optimization.
* @return {{ranges:Array.<{text:string, matched:boolean, includesLastSegment:boolean}>, matchGoodness:int, scoreDebug: Object}} matched ranges and score
*/
function stringMatch(str, query, options, special) {
var result;
options = options || {};
// No query? Short circuit the normal work done and just
// return a single range that covers the whole string.
if (!query) {
result = new SearchResult(str);
result.matchGoodness = 0;
if (DEBUG_SCORES) {
result.scoreDebug = {};
}
result.stringRanges = [{
text: str,
matched: false,
includesLastSegment: true
}];
return result;
}
// comparisons are case insensitive, so switch to lower case here
var queryLower = query.toLowerCase();
var compareLower = str.toLowerCase();
if (options.preferPrefixMatches) {
options.segmentedSearch = false;
}
if (options.preferPrefixMatches && compareLower.substr(0, queryLower.length) === queryLower) {
// NOTE: we compare against the case insensitive match
// above but we pass the case-sensitive version in
// because we want to weight the match to give case-matches
// a higher score
return _prefixMatchResult(str, query);
}
// Locate the special characters and then use orderedCompare to do the real
// work.
if (!special) {
special = findSpecialCharacters(str);
}
var lastSegmentStart, matchList;
// For strings that are not broken into multiple segments, we can potentially
// avoid some extra work
if (options.segmentedSearch) {
lastSegmentStart = special.specials[special.lastSegmentSpecialsIndex];
matchList = _wholeStringSearch(queryLower, compareLower, query, str, special.specials,
special.lastSegmentSpecialsIndex);
} else {
lastSegmentStart = 0;
matchList = _generateMatchList(queryLower, compareLower, query, str, special.specials, 0);
}
// If we get a match, turn this into a SearchResult as expected by the consumers
// of this API.
if (matchList) {
var compareData = _computeRangesAndScore(matchList, str, lastSegmentStart);
result = new SearchResult(str);
result.stringRanges = compareData.ranges;
result.matchGoodness = -1 * compareData.matchGoodness;
if (DEBUG_SCORES) {
result.scoreDebug = compareData.scoreDebug;
}
}
return result;
}
/**
* Sorts an array of SearchResult objects on a primary field, followed by secondary fields
* in case of ties. 'fieldSpec' provides the priority order for fields, where the first entry is the primary field, for example:
* multiFieldSort(bugList, [ "milestone", "severity" ]);
* Would sort a bug list by milestone, and within each milestone sort bugs by severity.
*
* fieldSpec can also include comparator functions of the form normally used by the sort()
* function.
*
* Any fields that have a string value are compared case-insensitively. Fields used should be
* present on all SearchResult objects (no optional/undefined fields).
*
* @param {!Array.<SearchResult>} searchResults
* @param {!Array.<string, function>} fieldSpec
*/
function multiFieldSort(searchResults, fieldSpec) {
// Move field names into an array, with primary field first
var comparisons;
if (Array.isArray(fieldSpec)) {
comparisons = fieldSpec;
} else {
// TODO Deprecate this form of calling this function
comparisons = [];
_.forEach(fieldSpec, function (priority, key) {
comparisons[priority] = key;
});
}
searchResults.sort(function (a, b) {
var priority;
for (priority = 0; priority < comparisons.length; priority++) {
var comparison = comparisons[priority];
if (typeof comparison === "function") {
var result = comparison(a, b);
if (result) {
return result;
}
} else {
var valueA = a[comparison];
var valueB = b[comparison];
if (typeof valueA === "string") {
valueA = valueA.toLowerCase();
valueB = valueB.toLowerCase();
}
if (valueA < valueB) {
return -1;
} else if (valueA > valueB) {
return 1;
}
}
// otherwise, move on to next sort priority
}
return 0; // all sort fields are equal
});
}
/**
* Sorts search results generated by stringMatch(): results are sorted into several
* tiers based on how well they matched the search query, then sorted alphabetically
* within each tier.
*/
function basicMatchSort(searchResults) {
multiFieldSort(searchResults, { matchGoodness: 0, label: 1 });
}
/**
* A StringMatcher provides an interface to the stringMatch function with built-in
* caching. You should use a StringMatcher for the lifetime of queries over a
* single data set.
*
* You are free to store other data on this object to assist in higher-level caching.
* (This object's caches are all stored in "_" prefixed properties.)
*
* @param {{preferPrefixMatches:?boolean, segmentedSearch:?boolean}} options to control search behavior.
* preferPrefixMatches puts an exact case-insensitive prefix match ahead of all other matches,
* even short-circuiting the match logic. This option implies segmentedSearch=false.
* segmentedSearch treats segments of the string specially.
*/
function StringMatcher(options) {
this.options = options;
this.reset();
}
/**
* Map from search-result string to the findSpecialCharacters() result for that string - easy to cache
* since this info doesn't change as the query changes.
* @type {Object.<string, {specials:Array.<number>, lastSegmentSpecialsIndex:number}>}
*/
StringMatcher.prototype._specialsCache = null;
/**
* Set of search-result strings that we know don't match the query _lastQuery - or any other query with
* that prefix.
* @type {Object.<string, boolean>}
*/
StringMatcher.prototype._noMatchCache = null;
/**
* Clears the caches. Use this in the event that the caches may be invalid.
*/
StringMatcher.prototype.reset = function () {
// We keep track of the last query to know when we need to invalidate.
this._lastQuery = null;
this._specialsCache = {};
this._noMatchCache = {};
};
/**
* Performs a single match using the stringMatch function. See stringMatch for full documentation.
*
* @param {string} str The string to search
* @param {string} query The query string to find in string
* @return {{ranges:Array.<{text:string, matched:boolean, includesLastSegment:boolean}>, matchGoodness:int, scoreDebug: Object}} matched ranges and score
*/
StringMatcher.prototype.match = function (str, query) {
// If the query is not just added characters from the previous query, we invalidate
// the no match cache and will re-match everything.
if (this._lastQuery !== null && (this._lastQuery !== query.substring(0, this._lastQuery.length))) {
this._noMatchCache = {};
}
this._lastQuery = query;
// Check for a known non-matching string.
if (_.has(this._noMatchCache, str)) {
return undefined;
}
// Load up the cached specials information (or build it if this is our first time through).
var special = _.has(this._specialsCache, str) ? this._specialsCache[str] : undefined;
if (special === undefined) {
special = findSpecialCharacters(str);
this._specialsCache[str] = special;
}
var result = stringMatch(str, query, this.options, special);
// If this query was not a match, we cache that fact for next time.
if (!result) {
this._noMatchCache[str] = true;
}
return result;
};
exports._findSpecialCharacters = findSpecialCharacters;
exports._wholeStringSearch = _wholeStringSearch;
exports._lastSegmentSearch = _lastSegmentSearch;
exports._setDebugScores = _setDebugScores;
exports._generateMatchList = _generateMatchList;
exports._SpecialMatch = SpecialMatch;
exports._NormalMatch = NormalMatch;
exports._computeRangesAndScore = _computeRangesAndScore;
// public exports
exports.SearchResult = SearchResult;
exports.stringMatch = stringMatch;
exports.basicMatchSort = basicMatchSort;
exports.multiFieldSort = multiFieldSort;
exports.StringMatcher = StringMatcher;
});