Skip to content

Commit 3b51563

Browse files
committed
Add use_cls_token to default dict.
1 parent 95fe8da commit 3b51563

File tree

5 files changed

+15
-1
lines changed

5 files changed

+15
-1
lines changed

rasa/nlu/tokenizers/jieba_tokenizer.py

+2
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ class JiebaTokenizer(Tokenizer):
3636
"intent_tokenization_flag": False,
3737
# Symbol on which intent should be split
3838
"intent_split_symbol": "_",
39+
# add __CLS__ token to the end of the list of tokens
40+
"use_cls_token": True,
3941
} # default don't load custom dictionary
4042

4143
def __init__(self, component_config: Dict[Text, Any] = None) -> None:

rasa/nlu/tokenizers/mitie_tokenizer.py

+5
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ class MitieTokenizer(Tokenizer):
1616

1717
provides = [MESSAGE_TOKENS_NAMES[attribute] for attribute in MESSAGE_ATTRIBUTES]
1818

19+
defaults = {
20+
# add __CLS__ token to the end of the list of tokens
21+
"use_cls_token": True
22+
}
23+
1924
@classmethod
2025
def required_packages(cls) -> List[Text]:
2126
return ["mitie"]

rasa/nlu/tokenizers/spacy_tokenizer.py

+5
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ class SpacyTokenizer(Tokenizer):
2828
for attribute in SPACY_FEATURIZABLE_ATTRIBUTES
2929
]
3030

31+
defaults = {
32+
# add __CLS__ token to the end of the list of tokens
33+
"use_cls_token": True
34+
}
35+
3136
def train(
3237
self, training_data: TrainingData, config: RasaNLUModelConfig, **kwargs: Any
3338
) -> None:

rasa/nlu/tokenizers/tokenizer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def __init__(self, component_config: Optional[Dict[Text, Any]] = None) -> None:
2929
if "use_cls_token" in self.component_config:
3030
self.use_cls_token = self.component_config["use_cls_token"]
3131
else:
32-
self.use_cls_token = False
32+
self.use_cls_token = True
3333

3434
def add_cls_token(
3535
self, tokens: List[Token], attribute: Text = MESSAGE_TEXT_ATTRIBUTE

rasa/nlu/tokenizers/whitespace_tokenizer.py

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ class WhitespaceTokenizer(Tokenizer):
2424
"intent_split_symbol": "_",
2525
# Text will be tokenized with case sensitive as default
2626
"case_sensitive": True,
27+
# add __CLS__ token to the end of the list of tokens
28+
"use_cls_token": True,
2729
}
2830

2931
def __init__(self, component_config: Dict[Text, Any] = None) -> None:

0 commit comments

Comments
 (0)