-
-
Notifications
You must be signed in to change notification settings - Fork 291
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add Regex Find node * revert text input change * Formatting
- Loading branch information
1 parent
87363dd
commit a35163c
Showing
5 changed files
with
217 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from __future__ import annotations | ||
|
||
from typing import Protocol | ||
|
||
from chainner_ext import MatchGroup, RegexMatch, RustRegex | ||
|
||
|
||
class Range(Protocol): | ||
@property | ||
def start(self) -> int: ... | ||
@property | ||
def end(self) -> int: ... | ||
|
||
|
||
def get_range_text(text: str, range: Range) -> str: | ||
return text[range.start : range.end] | ||
|
||
|
||
def match_to_replacements_dict( | ||
regex: RustRegex, match: RegexMatch, text: str | ||
) -> dict[str, str]: | ||
def get_group_text(group: MatchGroup | None) -> str: | ||
if group is None: | ||
return "" | ||
return get_range_text(text, group) | ||
|
||
replacements: dict[str, str] = {} | ||
for i in range(regex.groups + 1): | ||
replacements[str(i)] = get_group_text(match.get(i)) | ||
for name, i in regex.groupindex.items(): | ||
replacements[name] = get_group_text(match.get(i)) | ||
|
||
return replacements |
78 changes: 78 additions & 0 deletions
78
backend/src/packages/chaiNNer_standard/utility/text/regex_find.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
from __future__ import annotations | ||
|
||
from enum import Enum | ||
|
||
from chainner_ext import RustRegex | ||
|
||
from nodes.groups import if_enum_group | ||
from nodes.impl.rust_regex import get_range_text, match_to_replacements_dict | ||
from nodes.properties.inputs import EnumInput, TextInput | ||
from nodes.properties.outputs import TextOutput | ||
from nodes.utils.replacement import ReplacementString | ||
|
||
from .. import text_group | ||
|
||
|
||
class OutputMode(Enum): | ||
FULL_MATCH = 0 | ||
PATTERN = 1 | ||
|
||
|
||
@text_group.register( | ||
schema_id="chainner:utility:regex_find", | ||
name="Regex Find", | ||
description=[ | ||
"Find some text matching a given regex.", | ||
"This node has 2 modes for output: full match and pattern.", | ||
"- **Full Match:** return the full match. E.g. for the regex `\\d+` and the text `My two cats caught 32 mice in 14 days`, the output will be `32`." | ||
"\n- **Pattern:** using the same pattern syntax as in other nodes, return a formatted pattern of the match. E.g. for the regex `(\\w+) is (\\w+)`, the pattern is `{1}={2}`, and the text `My name is Jane.`, the output will be `name=Jane`.", | ||
], | ||
icon="MdTextFields", | ||
inputs=[ | ||
TextInput("Text"), | ||
TextInput("Regex", placeholder=r'E.g. "\b\w+\b"'), | ||
EnumInput( | ||
OutputMode, | ||
label="Output", | ||
default=OutputMode.FULL_MATCH, | ||
label_style="inline", | ||
).with_id(2), | ||
if_enum_group(2, OutputMode.PATTERN)( | ||
TextInput("Output Pattern", default="Found {0}") | ||
), | ||
], | ||
outputs=[ | ||
TextOutput( | ||
"Text", | ||
output_type=""" | ||
let pattern = match Input2 { | ||
OutputMode::FullMatch => "{0}", | ||
OutputMode::Pattern => Input3, | ||
}; | ||
regexFind(Input0, Input1, pattern) | ||
""", | ||
).with_never_reason( | ||
"Either the regex pattern or the replacement pattern is invalid" | ||
), | ||
], | ||
see_also=["chainner:utility:text_replace"], | ||
) | ||
def regex_find_node( | ||
text: str, | ||
regex_pattern: str, | ||
output: OutputMode, | ||
output_pattern: str, | ||
) -> str: | ||
r = RustRegex(regex_pattern) | ||
m = r.search(text) | ||
if m is None: | ||
raise RuntimeError( | ||
f"No match found. Unable to find the pattern '{regex_pattern}' in the text." | ||
) | ||
|
||
if output == OutputMode.FULL_MATCH: | ||
return get_range_text(text, m) | ||
elif output == OutputMode.PATTERN: | ||
replacements = match_to_replacements_dict(r, m, text) | ||
replacement = ReplacementString(output_pattern) | ||
return replacement.replace(replacements) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters