-
Notifications
You must be signed in to change notification settings - Fork 4.7k
/
Copy pathresponse_selector.py
541 lines (458 loc) · 18.6 KB
/
response_selector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
import logging
import numpy as np
import tensorflow as tf
from pathlib import Path
from typing import Any, Dict, Optional, Text, Tuple, Union, List, Type
import rasa.utils.io as io_utils
from rasa.nlu.config import InvalidConfigError
from rasa.nlu.training_data import TrainingData, Message
from rasa.nlu.components import Component
from rasa.nlu.featurizers.featurizer import Featurizer
from rasa.nlu.model import Metadata
from rasa.nlu.classifiers.diet_classifier import (
DIETClassifier,
DIET,
TEXT_FEATURES,
LABEL_FEATURES,
LABEL_IDS,
TEXT_SEQ_LENGTH,
LABEL_SEQ_LENGTH,
)
from rasa.utils.tensorflow.constants import (
LABEL,
HIDDEN_LAYERS_SIZES,
SHARE_HIDDEN_LAYERS,
TRANSFORMER_SIZE,
NUM_TRANSFORMER_LAYERS,
NUM_HEADS,
BATCH_SIZES,
BATCH_STRATEGY,
EPOCHS,
RANDOM_SEED,
LEARNING_RATE,
DENSE_DIMENSION,
RANKING_LENGTH,
LOSS_TYPE,
SIMILARITY_TYPE,
NUM_NEG,
SPARSE_INPUT_DROPOUT,
MASKED_LM,
ENTITY_RECOGNITION,
INTENT_CLASSIFICATION,
EVAL_NUM_EXAMPLES,
EVAL_NUM_EPOCHS,
UNIDIRECTIONAL_ENCODER,
DROP_RATE,
DROP_RATE_ATTENTION,
WEIGHT_SPARSITY,
NEGATIVE_MARGIN_SCALE,
REGULARIZATION_CONSTANT,
SCALE_LOSS,
USE_MAX_NEG_SIM,
MAX_NEG_SIM,
MAX_POS_SIM,
EMBEDDING_DIMENSION,
BILOU_FLAG,
KEY_RELATIVE_ATTENTION,
VALUE_RELATIVE_ATTENTION,
MAX_RELATIVE_POSITION,
RETRIEVAL_INTENT,
SOFTMAX,
AUTO,
BALANCED,
TENSORBOARD_LOG_DIR,
TENSORBOARD_LOG_LEVEL,
)
from rasa.nlu.constants import (
RESPONSE,
RESPONSE_SELECTOR_PROPERTY_NAME,
RESPONSE_KEY_ATTRIBUTE,
INTENT,
DEFAULT_OPEN_UTTERANCE_TYPE,
TEXT,
)
from rasa.utils.tensorflow.model_data import RasaModelData
from rasa.utils.tensorflow.models import RasaModel
logger = logging.getLogger(__name__)
class ResponseSelector(DIETClassifier):
"""Response selector using supervised embeddings.
The response selector embeds user inputs
and candidate response into the same space.
Supervised embeddings are trained by maximizing similarity between them.
It also provides rankings of the response that did not "win".
The supervised response selector needs to be preceded by
a featurizer in the pipeline.
This featurizer creates the features used for the embeddings.
It is recommended to use ``CountVectorsFeaturizer`` that
can be optionally preceded by ``SpacyNLP`` and ``SpacyTokenizer``.
Based on the starspace idea from: https://arxiv.org/abs/1709.03856.
However, in this implementation the `mu` parameter is treated differently
and additional hidden layers are added together with dropout.
"""
@classmethod
def required_components(cls) -> List[Type[Component]]:
return [Featurizer]
defaults = {
# ## Architecture of the used neural network
# Hidden layer sizes for layers before the embedding layers for user message
# and labels.
# The number of hidden layers is equal to the length of the corresponding
# list.
HIDDEN_LAYERS_SIZES: {TEXT: [256, 128], LABEL: [256, 128]},
# Whether to share the hidden layer weights between input words and responses
SHARE_HIDDEN_LAYERS: False,
# Number of units in transformer
TRANSFORMER_SIZE: None,
# Number of transformer layers
NUM_TRANSFORMER_LAYERS: 0,
# Number of attention heads in transformer
NUM_HEADS: 4,
# If 'True' use key relative embeddings in attention
KEY_RELATIVE_ATTENTION: False,
# If 'True' use key relative embeddings in attention
VALUE_RELATIVE_ATTENTION: False,
# Max position for relative embeddings
MAX_RELATIVE_POSITION: None,
# Use a unidirectional or bidirectional encoder.
UNIDIRECTIONAL_ENCODER: False,
# ## Training parameters
# Initial and final batch sizes:
# Batch size will be linearly increased for each epoch.
BATCH_SIZES: [64, 256],
# Strategy used when creating batches.
# Can be either 'sequence' or 'balanced'.
BATCH_STRATEGY: BALANCED,
# Number of epochs to train
EPOCHS: 300,
# Set random seed to any 'int' to get reproducible results
RANDOM_SEED: None,
# Initial learning rate for the optimizer
LEARNING_RATE: 0.001,
# ## Parameters for embeddings
# Dimension size of embedding vectors
EMBEDDING_DIMENSION: 20,
# Default dense dimension to use if no dense features are present.
DENSE_DIMENSION: {TEXT: 512, LABEL: 512},
# The number of incorrect labels. The algorithm will minimize
# their similarity to the user input during training.
NUM_NEG: 20,
# Type of similarity measure to use, either 'auto' or 'cosine' or 'inner'.
SIMILARITY_TYPE: AUTO,
# The type of the loss function, either 'softmax' or 'margin'.
LOSS_TYPE: SOFTMAX,
# Number of top actions to normalize scores for loss type 'softmax'.
# Set to 0 to turn off normalization.
RANKING_LENGTH: 10,
# Indicates how similar the algorithm should try to make embedding vectors
# for correct labels.
# Should be 0.0 < ... < 1.0 for 'cosine' similarity type.
MAX_POS_SIM: 0.8,
# Maximum negative similarity for incorrect labels.
# Should be -1.0 < ... < 1.0 for 'cosine' similarity type.
MAX_NEG_SIM: -0.4,
# If 'True' the algorithm only minimizes maximum similarity over
# incorrect intent labels, used only if 'loss_type' is set to 'margin'.
USE_MAX_NEG_SIM: True,
# Scale loss inverse proportionally to confidence of correct prediction
SCALE_LOSS: True,
# ## Regularization parameters
# The scale of regularization
REGULARIZATION_CONSTANT: 0.002,
# Sparsity of the weights in dense layers
WEIGHT_SPARSITY: 0.0,
# The scale of how important is to minimize the maximum similarity
# between embeddings of different labels.
NEGATIVE_MARGIN_SCALE: 0.8,
# Dropout rate for encoder
DROP_RATE: 0.2,
# Dropout rate for attention
DROP_RATE_ATTENTION: 0,
# If 'True' apply dropout to sparse tensors
SPARSE_INPUT_DROPOUT: False,
# ## Evaluation parameters
# How often calculate validation accuracy.
# Small values may hurt performance, e.g. model accuracy.
EVAL_NUM_EPOCHS: 20,
# How many examples to use for hold out validation set
# Large values may hurt performance, e.g. model accuracy.
EVAL_NUM_EXAMPLES: 0,
# ## Selector config
# If 'True' random tokens of the input message will be masked and the model
# should predict those tokens.
MASKED_LM: False,
# Name of the intent for which this response selector is to be trained
RETRIEVAL_INTENT: None,
# If you want to use tensorboard to visualize training and validation metrics,
# set this option to a valid output directory.
TENSORBOARD_LOG_DIR: None,
# Define when training metrics for tensorboard should be logged.
# Either after every epoch or for every training step.
# Valid values: 'epoch' and 'minibatch'
TENSORBOARD_LOG_LEVEL: "epoch",
}
def __init__(
self,
component_config: Optional[Dict[Text, Any]] = None,
index_label_id_mapping: Optional[Dict[int, Text]] = None,
index_tag_id_mapping: Optional[Dict[int, Text]] = None,
model: Optional[RasaModel] = None,
retrieval_intent_mapping: Optional[Dict[Text, Text]] = None,
) -> None:
component_config = component_config or {}
# the following properties cannot be adapted for the ResponseSelector
component_config[INTENT_CLASSIFICATION] = True
component_config[ENTITY_RECOGNITION] = False
component_config[BILOU_FLAG] = None
self.retrieval_intent_mapping = retrieval_intent_mapping or {}
super().__init__(
component_config, index_label_id_mapping, index_tag_id_mapping, model
)
@property
def label_key(self) -> Text:
return LABEL_IDS
@staticmethod
def model_class() -> Type[RasaModel]:
return DIET2DIET
def _load_selector_params(self, config: Dict[Text, Any]) -> None:
self.retrieval_intent = config[RETRIEVAL_INTENT]
def _check_config_parameters(self) -> None:
super()._check_config_parameters()
self._load_selector_params(self.component_config)
@staticmethod
def _create_retrieval_intent_mapping(
training_data: TrainingData,
) -> Dict[Text, Text]:
"""Create response_key dictionary"""
retrieval_intent_mapping = {}
for example in training_data.intent_examples:
retrieval_intent_mapping[
example.get(RESPONSE)
] = f"{example.get(INTENT)}/{example.get(RESPONSE_KEY_ATTRIBUTE)}"
return retrieval_intent_mapping
@staticmethod
def _set_message_property(
message: Message, prediction_dict: Dict[Text, Any], selector_key: Text
) -> None:
message_selector_properties = message.get(RESPONSE_SELECTOR_PROPERTY_NAME, {})
message_selector_properties[selector_key] = prediction_dict
message.set(
RESPONSE_SELECTOR_PROPERTY_NAME,
message_selector_properties,
add_to_output=True,
)
def preprocess_train_data(self, training_data: TrainingData) -> RasaModelData:
"""Prepares data for training.
Performs sanity checks on training data, extracts encodings for labels.
"""
if self.retrieval_intent:
training_data = training_data.filter_by_intent(self.retrieval_intent)
else:
# retrieval intent was left to its default value
logger.info(
"Retrieval intent parameter was left to its default value. This "
"response selector will be trained on training examples combining "
"all retrieval intents."
)
label_id_index_mapping = self._label_id_index_mapping(
training_data, attribute=RESPONSE
)
self.retrieval_intent_mapping = self._create_retrieval_intent_mapping(
training_data
)
if not label_id_index_mapping:
# no labels are present to train
return RasaModelData()
self.index_label_id_mapping = self._invert_mapping(label_id_index_mapping)
self._label_data = self._create_label_data(
training_data, label_id_index_mapping, attribute=RESPONSE
)
model_data = self._create_model_data(
training_data.intent_examples,
label_id_index_mapping,
label_attribute=RESPONSE,
)
self._check_input_dimension_consistency(model_data)
return model_data
def process(self, message: Message, **kwargs: Any) -> None:
"""Return the most likely response and its similarity to the input."""
out = self._predict(message)
label, label_ranking = self._predict_label(out)
retrieval_intent_name = self.retrieval_intent_mapping.get(label.get("name"))
selector_key = (
self.retrieval_intent
if self.retrieval_intent
else DEFAULT_OPEN_UTTERANCE_TYPE
)
logger.debug(
f"Adding following selector key to message property: {selector_key}"
)
prediction_dict = {
"response": label,
"ranking": label_ranking,
"full_retrieval_intent": retrieval_intent_name,
}
self._set_message_property(message, prediction_dict, selector_key)
def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
"""Persist this model into the passed directory.
Return the metadata necessary to load the model again.
"""
if self.model is None:
return {"file": None}
super().persist(file_name, model_dir)
model_dir = Path(model_dir)
io_utils.json_pickle(
model_dir / f"{file_name}.retrieval_intent_mapping.pkl",
self.retrieval_intent_mapping,
)
return {"file": file_name}
@classmethod
def load(
cls,
meta: Dict[Text, Any],
model_dir: Text = None,
model_metadata: Metadata = None,
cached_component: Optional["ResponseSelector"] = None,
**kwargs: Any,
) -> "ResponseSelector":
"""Loads the trained model from the provided directory."""
model = super().load(
meta, model_dir, model_metadata, cached_component, **kwargs
)
if model == cls(component_config=meta):
model.retrieval_intent_mapping = {}
return model # pytype: disable=bad-return-type
file_name = meta.get("file")
model_dir = Path(model_dir)
retrieval_intent_mapping = io_utils.json_unpickle(
model_dir / f"{file_name}.retrieval_intent_mapping.pkl"
)
model.retrieval_intent_mapping = retrieval_intent_mapping
return model # pytype: disable=bad-return-type
class DIET2DIET(DIET):
def _check_data(self) -> None:
if TEXT_FEATURES not in self.data_signature:
raise InvalidConfigError(
f"No text features specified. "
f"Cannot train '{self.__class__.__name__}' model."
)
if LABEL_FEATURES not in self.data_signature:
raise InvalidConfigError(
f"No label features specified. "
f"Cannot train '{self.__class__.__name__}' model."
)
if (
self.config[SHARE_HIDDEN_LAYERS]
and self.data_signature[TEXT_FEATURES]
!= self.data_signature[LABEL_FEATURES]
):
raise ValueError(
"If hidden layer weights are shared, data signatures "
"for text_features and label_features must coincide."
)
def _create_metrics(self) -> None:
# self.metrics preserve order
# output losses first
self.mask_loss = tf.keras.metrics.Mean(name="m_loss")
self.response_loss = tf.keras.metrics.Mean(name="r_loss")
# output accuracies second
self.mask_acc = tf.keras.metrics.Mean(name="m_acc")
self.response_acc = tf.keras.metrics.Mean(name="r_acc")
def _update_metrics_to_log(self) -> None:
if self.config[MASKED_LM]:
self.metrics_to_log += ["m_loss", "m_acc"]
self.metrics_to_log += ["r_loss", "r_acc"]
def _prepare_layers(self) -> None:
self.text_name = TEXT
self.label_name = TEXT if self.config[SHARE_HIDDEN_LAYERS] else LABEL
self._prepare_sequence_layers(self.text_name)
self._prepare_sequence_layers(self.label_name)
if self.config[MASKED_LM]:
self._prepare_mask_lm_layers(self.text_name)
self._prepare_label_classification_layers()
def _create_all_labels(self) -> Tuple[tf.Tensor, tf.Tensor]:
all_label_ids = self.tf_label_data[LABEL_IDS][0]
sequence_lengths_label = tf.cast(
self.tf_label_data[LABEL_SEQ_LENGTH][0], dtype=tf.int32
)
mask_label = self.compute_mask(sequence_lengths_label)
label_transformed, _, _, _ = self._create_sequence(
self.tf_label_data[LABEL_FEATURES], mask_label, self.label_name
)
cls_label = self._last_token(label_transformed, sequence_lengths_label)
all_labels_embed = self._tf_layers[f"embed.{LABEL}"](cls_label)
return all_label_ids, all_labels_embed
def batch_loss(
self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]]
) -> tf.Tensor:
tf_batch_data = self.batch_to_model_data_format(batch_in, self.data_signature)
sequence_lengths_text = tf.cast(
tf_batch_data[TEXT_SEQ_LENGTH][0], dtype=tf.int32
)
mask_text = self.compute_mask(sequence_lengths_text)
(
text_transformed,
text_in,
text_seq_ids,
lm_mask_bool_text,
) = self._create_sequence(
tf_batch_data[TEXT_FEATURES],
mask_text,
self.text_name,
self.config[MASKED_LM],
sequence_ids=True,
)
sequence_lengths_label = tf.cast(
tf_batch_data[LABEL_SEQ_LENGTH][0], dtype=tf.int32
)
mask_label = self.compute_mask(sequence_lengths_label)
label_transformed, _, _, _ = self._create_sequence(
tf_batch_data[LABEL_FEATURES], mask_label, self.label_name
)
losses = []
if self.config[MASKED_LM]:
loss, acc = self._mask_loss(
text_transformed,
text_in,
text_seq_ids,
lm_mask_bool_text,
self.text_name,
)
self.mask_loss.update_state(loss)
self.mask_acc.update_state(acc)
losses.append(loss)
# get _cls_ vector for label classification
cls_text = self._last_token(text_transformed, sequence_lengths_text)
cls_label = self._last_token(label_transformed, sequence_lengths_label)
label_ids = tf_batch_data[LABEL_IDS][0]
loss, acc = self._calculate_label_loss(cls_text, cls_label, label_ids)
self.response_loss.update_state(loss)
self.response_acc.update_state(acc)
losses.append(loss)
return tf.math.add_n(losses)
def batch_predict(
self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]]
) -> Dict[Text, tf.Tensor]:
tf_batch_data = self.batch_to_model_data_format(
batch_in, self.predict_data_signature
)
sequence_lengths_text = tf.cast(
tf_batch_data[TEXT_SEQ_LENGTH][0], dtype=tf.int32
)
mask_text = self.compute_mask(sequence_lengths_text)
text_transformed, _, _, _ = self._create_sequence(
tf_batch_data[TEXT_FEATURES], mask_text, self.text_name
)
out = {}
if self.all_labels_embed is None:
_, self.all_labels_embed = self._create_all_labels()
# get _cls_ vector for intent classification
cls = self._last_token(text_transformed, sequence_lengths_text)
cls_embed = self._tf_layers[f"embed.{TEXT}"](cls)
sim_all = self._tf_layers[f"loss.{LABEL}"].sim(
cls_embed[:, tf.newaxis, :], self.all_labels_embed[tf.newaxis, :, :]
)
scores = self._tf_layers[f"loss.{LABEL}"].confidence_from_sim(
sim_all, self.config[SIMILARITY_TYPE]
)
out["i_scores"] = scores
return out