-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathregistry.py
218 lines (191 loc) · 8.15 KB
/
registry.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
"""This is a somewhat delicate package. It contains all registered components
and preconfigured templates.
Hence, it imports all of the components. To avoid cycles, no component should
import this in module scope."""
import logging
import typing
from typing import Any, Dict, List, Optional, Text, Type
from rasa.nlu.classifiers.embedding_intent_classifier import EmbeddingIntentClassifier
from rasa.nlu.classifiers.keyword_intent_classifier import KeywordIntentClassifier
from rasa.nlu.classifiers.mitie_intent_classifier import MitieIntentClassifier
from rasa.nlu.classifiers.sklearn_intent_classifier import SklearnIntentClassifier
from rasa.nlu.classifiers.custom_intent_classifier import CustomIntentClassifier
from rasa.nlu.extractors.crf_entity_extractor import CRFEntityExtractor
from rasa.nlu.extractors.duckling_http_extractor import DucklingHTTPExtractor
from rasa.nlu.extractors.entity_synonyms import EntitySynonymMapper
from rasa.nlu.extractors.mitie_entity_extractor import MitieEntityExtractor
from rasa.nlu.extractors.spacy_entity_extractor import SpacyEntityExtractor
from rasa.nlu.extractors.custom_extractors import ScriptExtractor
from rasa.nlu.extractors.custom_extractors import LatinTextExtractor
from rasa.nlu.extractors.custom_extractors import LanguageExtractor
from rasa.nlu.featurizers.count_vectors_featurizer import CountVectorsFeaturizer
from rasa.nlu.featurizers.mitie_featurizer import MitieFeaturizer
from rasa.nlu.featurizers.ngram_featurizer import NGramFeaturizer
from rasa.nlu.featurizers.regex_featurizer import RegexFeaturizer
from rasa.nlu.featurizers.spacy_featurizer import SpacyFeaturizer
from rasa.nlu.featurizers.custom_featurizer import CustomFeaturizer
from rasa.nlu.model import Metadata
from rasa.nlu.tokenizers.jieba_tokenizer import JiebaTokenizer
from rasa.nlu.tokenizers.mitie_tokenizer import MitieTokenizer
from rasa.nlu.tokenizers.spacy_tokenizer import SpacyTokenizer
from rasa.nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer
from rasa.nlu.utils.mitie_utils import MitieNLP
from rasa.nlu.utils.spacy_utils import SpacyNLP
from rasa.utils.common import class_from_module_path
if typing.TYPE_CHECKING:
from rasa.nlu.components import Component
from rasa.nlu.config import RasaNLUModelConfig, RasaNLUModelConfig
logger = logging.getLogger(__name__)
# Classes of all known components. If a new component should be added,
# its class name should be listed here.
component_classes = [
# utils
SpacyNLP,
MitieNLP,
# tokenizers
MitieTokenizer,
SpacyTokenizer,
WhitespaceTokenizer,
JiebaTokenizer,
# extractors
SpacyEntityExtractor,
MitieEntityExtractor,
CRFEntityExtractor,
DucklingHTTPExtractor,
EntitySynonymMapper,
ScriptExtractor,
LatinTextExtractor,
LanguageExtractor,
# featurizers
SpacyFeaturizer,
MitieFeaturizer,
NGramFeaturizer,
RegexFeaturizer,
CountVectorsFeaturizer,
CustomFeaturizer,
# classifiers
SklearnIntentClassifier,
MitieIntentClassifier,
KeywordIntentClassifier,
EmbeddingIntentClassifier,
CustomIntentClassifier,
]
# Mapping from a components name to its class to allow name based lookup.
registered_components = {c.name: c for c in component_classes}
# DEPRECATED ensures compatibility, will be remove in future versions
old_style_names = {
"nlp_spacy": "SpacyNLP",
"nlp_mitie": "MitieNLP",
"ner_spacy": "SpacyEntityExtractor",
"ner_mitie": "MitieEntityExtractor",
"ner_crf": "CRFEntityExtractor",
"ner_duckling_http": "DucklingHTTPExtractor",
"script_extractor": "ScriptExtractor",
"latin_text_extractor": "LatinTextExtractor",
"language_extractor": "LanguageExtractor",
"ner_synonyms": "EntitySynonymMapper",
"intent_featurizer_spacy": "SpacyFeaturizer",
"intent_featurizer_mitie": "MitieFeaturizer",
"intent_featurizer_ngrams": "NGramFeaturizer",
"intent_entity_featurizer_regex": "RegexFeaturizer",
"intent_featurizer_count_vectors": "CountVectorsFeaturizer",
"custom_featurizer": "CustomFeaturizer",
"tokenizer_mitie": "MitieTokenizer",
"tokenizer_spacy": "SpacyTokenizer",
"tokenizer_whitespace": "WhitespaceTokenizer",
"tokenizer_jieba": "JiebaTokenizer",
"intent_classifier_sklearn": "SklearnIntentClassifier",
"intent_classifier_mitie": "MitieIntentClassifier",
"intent_classifier_keyword": "KeywordIntentClassifier",
"intent_classifier_tensorflow_embedding": "EmbeddingIntentClassifier",
"custom_intent_classifier": "CustomIntentClassifier",
}
# To simplify usage, there are a couple of model templates, that already add
# necessary components in the right order. They also implement
# the preexisting `backends`.
registered_pipeline_templates = {
"pretrained_embeddings_spacy": [
"SpacyNLP",
"SpacyTokenizer",
"SpacyFeaturizer",
"RegexFeaturizer",
"CRFEntityExtractor",
"EntitySynonymMapper",
"SklearnIntentClassifier",
],
"keyword": ["KeywordIntentClassifier"],
"supervised_embeddings": [
"WhitespaceTokenizer",
"RegexFeaturizer",
"CRFEntityExtractor",
"EntitySynonymMapper",
"CountVectorsFeaturizer",
"EmbeddingIntentClassifier",
],
"custom_pipeline": [
"ScriptExtractor",
"LatinTextExtractor",
"CustomFeaturizer",
"LanguageExtractor",
"CustomIntentClassifier", ]
}
def pipeline_template(s: Text) -> Optional[List[Dict[Text, Text]]]:
components = registered_pipeline_templates.get(s)
if components:
# converts the list of components in the configuration
# format expected (one json object per component)
return [{"name": c} for c in components]
else:
return None
def get_component_class(component_name: Text) -> Type["Component"]:
"""Resolve component name to a registered components class."""
if component_name not in registered_components:
if component_name not in old_style_names:
try:
return class_from_module_path(component_name)
except Exception:
raise Exception(
"Failed to find component class for '{}'. Unknown "
"component name. Check your configured pipeline and make "
"sure the mentioned component is not misspelled. If you "
"are creating your own component, make sure it is either "
"listed as part of the `component_classes` in "
"`rasa.nlu.registry.py` or is a proper name of a class "
"in a module.".format(component_name)
)
else:
# DEPRECATED ensures compatibility, remove in future versions
logger.warning(
"DEPRECATION warning: your nlu config file "
"contains old style component name `{}`, "
"you should change it to its class name: `{}`."
"".format(component_name, old_style_names[component_name])
)
component_name = old_style_names[component_name]
return registered_components[component_name]
def load_component_by_meta(
component_meta: Dict[Text, Any],
model_dir: Text,
metadata: Metadata,
cached_component: Optional["Component"],
**kwargs: Any
) -> Optional["Component"]:
"""Resolves a component and calls its load method.
Inits it based on a previously persisted model.
"""
# try to get class name first, else create by name
component_name = component_meta.get("class", component_meta["name"])
component_class = get_component_class(component_name)
return component_class.load(
component_meta, model_dir, metadata, cached_component, **kwargs
)
def create_component_by_config(
component_config: Dict[Text, Any], config: "RasaNLUModelConfig"
) -> Optional["Component"]:
"""Resolves a component and calls it's create method.
Inits it based on a previously persisted model.
"""
# try to get class name first, else create by name
component_name = component_config.get("class", component_config["name"])
component_class = get_component_class(component_name)
return component_class.create(component_config, config)