-
Notifications
You must be signed in to change notification settings - Fork 29.2k
/
normalization.ts
49 lines (40 loc) · 1.55 KB
/
normalization.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { LRUCache } from 'vs/base/common/map';
const nfcCache = new LRUCache<string, string>(10000); // bounded to 10000 elements
export function normalizeNFC(str: string): string {
return normalize(str, 'NFC', nfcCache);
}
const nfdCache = new LRUCache<string, string>(10000); // bounded to 10000 elements
export function normalizeNFD(str: string): string {
return normalize(str, 'NFD', nfdCache);
}
const nonAsciiCharactersPattern = /[^\u0000-\u0080]/;
function normalize(str: string, form: string, normalizedCache: LRUCache<string, string>): string {
if (!str) {
return str;
}
const cached = normalizedCache.get(str);
if (cached) {
return cached;
}
let res: string;
if (nonAsciiCharactersPattern.test(str)) {
res = str.normalize(form);
} else {
res = str;
}
// Use the cache for fast lookup
normalizedCache.set(str, res);
return res;
}
export const removeAccents: (str: string) => string = (function () {
// transform into NFD form and remove accents
// see: https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/37511463#37511463
const regex = /[\u0300-\u036f]/g;
return function (str: string) {
return normalizeNFD(str).replace(regex, '');
};
})();