19
19
GROUP_ENTITY_DICT = "entity_dict"
20
20
GROUP_ENTITY_TEXT = "entity_text"
21
21
GROUP_COMPLETE_MATCH = 0
22
+ GROUP_ENTITY_DICT_LIST = "list_entity_dicts"
22
23
23
- # regex for: `[entity_text]((entity_type(:entity_synonym)?)|{entity_dict})`
24
+ # regex for: `[entity_text]((entity_type(:entity_synonym)?)|{entity_dict}|[list_entity_dicts])` # noqa: E501, W505
24
25
ENTITY_REGEX = re .compile (
25
- r"\[(?P<entity_text>[^\]]+?)\](\((?P<entity>[^:)]+?)(?:\:(?P<value>[^)]+))?\)|\{(?P<entity_dict>[^}]+?)\})" # noqa: E501, W505
26
+ r"\[(?P<entity_text>[^\]]+?)\](\((?P<entity>[^:)]+?)(?:\:(?P<value>[^)]+))?\)|\{(?P<entity_dict>[^}]+?)\}|\[(?P<list_entity_dicts>.*?)\] )" # noqa: E501, W505
26
27
)
27
28
29
+ SINGLE_ENTITY_DICT = re .compile (r"{(?P<entity_dict>[^}]+?)\}" )
30
+
28
31
29
32
class EntityAttributes (NamedTuple ):
30
33
"""Attributes of an entity defined in markdown data."""
@@ -50,21 +53,47 @@ def find_entities_in_training_example(example: Text) -> List[Dict[Text, Any]]:
50
53
offset = 0
51
54
52
55
for match in re .finditer (ENTITY_REGEX , example ):
53
- entity_attributes = extract_entity_attributes (match )
54
-
55
- start_index = match .start () - offset
56
- end_index = start_index + len (entity_attributes .text )
57
- offset += len (match .group (0 )) - len (entity_attributes .text )
58
-
59
- entity = rasa .shared .nlu .training_data .util .build_entity (
60
- start_index ,
61
- end_index ,
62
- entity_attributes .value ,
63
- entity_attributes .type ,
64
- entity_attributes .role ,
65
- entity_attributes .group ,
66
- )
67
- entities .append (entity )
56
+ if match .groupdict ()[GROUP_ENTITY_DICT ] or match .groupdict ()[GROUP_ENTITY_TYPE ]:
57
+ entity_attributes = extract_entity_attributes (match )
58
+
59
+ start_index = match .start () - offset
60
+ end_index = start_index + len (entity_attributes .text )
61
+ offset += len (match .group (0 )) - len (entity_attributes .text )
62
+
63
+ entity = rasa .shared .nlu .training_data .util .build_entity (
64
+ start_index ,
65
+ end_index ,
66
+ entity_attributes .value ,
67
+ entity_attributes .type ,
68
+ entity_attributes .role ,
69
+ entity_attributes .group ,
70
+ )
71
+ entities .append (entity )
72
+ else :
73
+ entity_text = match .groupdict ()[GROUP_ENTITY_TEXT ]
74
+ # iterate over the list
75
+
76
+ start_index = match .start () - offset
77
+ end_index = start_index + len (entity_text )
78
+ offset += len (match .group (0 )) - len (entity_text )
79
+
80
+ for match_inner in re .finditer (
81
+ SINGLE_ENTITY_DICT , match .groupdict ()[GROUP_ENTITY_DICT_LIST ]
82
+ ):
83
+
84
+ entity_attributes = extract_entity_attributes_from_dict (
85
+ entity_text = entity_text , match = match_inner
86
+ )
87
+
88
+ entity = rasa .shared .nlu .training_data .util .build_entity (
89
+ start_index ,
90
+ end_index ,
91
+ entity_attributes .value ,
92
+ entity_attributes .type ,
93
+ entity_attributes .role ,
94
+ entity_attributes .group ,
95
+ )
96
+ entities .append (entity )
68
97
69
98
return entities
70
99
0 commit comments