-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtokenizer-models.js
42 lines (42 loc) · 912 Bytes
/
tokenizer-models.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
export const tokenizerModels = [
{
"model_name": "Xenova/all-MiniLM-L6-v2",
"max_tokens": 512,
},
{
"model_name": "Xenova/paraphrase-multilingual-MiniLM-L12-v2",
"max_tokens": 512,
},
{
"model_name": "Xenova/bert-base-uncased",
"max_tokens": 512,
},
{
"model_name": "Xenova/gpt2",
"max_tokens": 512,
},
{
"model_name": "Xenova/roberta-base",
"max_tokens": 512,
},
{
"model_name": "Xenova/all-distilroberta-v1",
"max_tokens": 512,
},
{
"model_name": "Xenova/multilingual-e5-large",
"max_tokens": 512,
},
{
"model_name": "Xenova/bert-base-multilingual-uncased",
"max_tokens": 512,
},
{
"model_name": "Xenova/xlm-roberta-base",
"max_tokens": 512,
},
{
"model_name": "BAAI/bge-base-en-v1.5",
"max_tokens": 512,
}
]