-
Notifications
You must be signed in to change notification settings - Fork 632
/
Copy pathunstable_slugify.ts
126 lines (120 loc) · 4.15 KB
/
unstable_slugify.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
// This module is browser compatible.
const wordSegmenter = new Intl.Segmenter("en-US", { granularity: "word" });
/** Options for {@linkcode slugify}. */
export type SlugifyOptions = {
/**
* The regular expression to use for stripping characters.
* @default {typeof NON_WORD}
*/
strip: RegExp;
/**
* The transliteration function to use for converting non-Latin text.
* Called on each word in the input before joining them with dashes.
* @default {undefined}
*/
transliterate: ((word: string) => string) | undefined;
};
/**
* A regular expression for stripping non-word characters from slugs.
*
* @example Usage
* ```ts
* import { NON_WORD, slugify } from "@std/text/unstable-slugify";
* import { assertEquals } from "@std/assert";
* assertEquals(slugify("déjà-vu", { strip: NON_WORD }), "déjà-vu");
* assertEquals(slugify("Συστημάτων Γραφής", { strip: NON_WORD }), "συστημάτων-γραφής");
* ```
*/
export const NON_WORD = /[^\p{L}\p{M}\p{N}\-]+/gu;
/**
* A regular expression for stripping diacritics from slugs.
*
* @example Usage
* ```ts
* import { DIACRITICS, slugify } from "@std/text/unstable-slugify";
* import { assertEquals } from "@std/assert";
* assertEquals(slugify("déjà-vu", { strip: DIACRITICS }), "deja-vu");
* assertEquals(slugify("Συστημάτων Γραφής", { strip: DIACRITICS }), "συστηματων-γραφης");
* ```
*/
export const DIACRITICS = /[^\p{L}\p{N}\-]+/gu;
/**
* A regular expression for stripping ASCII diacritics (but not other diacritics) from slugs.
*
* @example Usage
* ```ts
* import { ASCII_DIACRITICS, slugify } from "@std/text/unstable-slugify";
* import { assertEquals } from "@std/assert";
* assertEquals(slugify("déjà-vu", { strip: ASCII_DIACRITICS }), "deja-vu");
* assertEquals(slugify("Συστημάτων Γραφής", { strip: ASCII_DIACRITICS }), "συστημάτων-γραφής");
* ```
*/
export const ASCII_DIACRITICS = /(?<=[a-zA-Z])\p{M}+|[^\p{L}\p{M}\p{N}\-]+/gu;
/**
* A regular expression for stripping non-ASCII characters from slugs.
*
* @example Usage
* ```ts
* import { NON_ASCII, slugify } from "@std/text/unstable-slugify";
* import { assertEquals } from "@std/assert";
* assertEquals(slugify("déjà-vu", { strip: NON_ASCII }), "deja-vu");
* assertEquals(slugify("Συστημάτων Γραφής", { strip: NON_ASCII }), "-");
* ```
*/
export const NON_ASCII = /[^0-9a-zA-Z\-]/g;
/**
* Converts a string into a {@link https://en.wikipedia.org/wiki/Clean_URL#Slug | slug}.
*
* @experimental **UNSTABLE**: New API, yet to be vetted.
*
* @param input The string that is going to be converted into a slug
* @param options The options for the slugify function
* @returns The string as a slug
*
* @example Basic usage
* ```ts
* import { slugify } from "@std/text/unstable-slugify";
* import { assertEquals } from "@std/assert";
*
* assertEquals(slugify("Hello, world!"), "hello-world");
* assertEquals(slugify("Συστημάτων Γραφής"), "συστημάτων-γραφής");
* ```
*
* @example With transliteration using a third-party library
* ```ts no-eval
* import { NON_ASCII, slugify } from "@std/text/unstable-slugify";
* // example third-party transliteration library
* import transliterate from "npm:any-ascii";
*
* slugify("Συστημάτων Γραφής", { transliterate, strip: NON_ASCII });
* // => "sistimaton-grafis"
* ```
*/
export function slugify(
input: string,
options?: Partial<SlugifyOptions>,
): string {
// clone with `new RegExp` in case `lastIndex` isn't zeroed
const stripRe = new RegExp(options?.strip ?? NON_WORD);
const words: string[] = [];
for (
const s of wordSegmenter.segment(
input.trim().normalize("NFD").toLowerCase(),
)
) {
if (s.isWordLike) {
words.push(s.segment);
} else if (s.segment.length) {
words.push("-");
}
}
return words
.map(options?.transliterate ?? ((x) => x))
.join(options?.transliterate ? "-" : "")
.replaceAll(stripRe, "")
.normalize("NFC")
.replaceAll(/-{2,}/g, "-")
.replaceAll(/^-|-$/g, "") ||
"-";
}