Skip to content

Commit

Permalink
Add Regex Find node (#2855)
Browse files Browse the repository at this point in the history
* Add Regex Find node

* revert text input change

* Formatting
  • Loading branch information
RunDevelopment authored May 17, 2024
1 parent 87363dd commit a35163c
Show file tree
Hide file tree
Showing 5 changed files with 217 additions and 38 deletions.
33 changes: 33 additions & 0 deletions backend/src/nodes/impl/rust_regex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from __future__ import annotations

from typing import Protocol

from chainner_ext import MatchGroup, RegexMatch, RustRegex


class Range(Protocol):
@property
def start(self) -> int: ...
@property
def end(self) -> int: ...


def get_range_text(text: str, range: Range) -> str:
return text[range.start : range.end]


def match_to_replacements_dict(
regex: RustRegex, match: RegexMatch, text: str
) -> dict[str, str]:
def get_group_text(group: MatchGroup | None) -> str:
if group is None:
return ""
return get_range_text(text, group)

replacements: dict[str, str] = {}
for i in range(regex.groups + 1):
replacements[str(i)] = get_group_text(match.get(i))
for name, i in regex.groupindex.items():
replacements[name] = get_group_text(match.get(i))

return replacements
78 changes: 78 additions & 0 deletions backend/src/packages/chaiNNer_standard/utility/text/regex_find.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from __future__ import annotations

from enum import Enum

from chainner_ext import RustRegex

from nodes.groups import if_enum_group
from nodes.impl.rust_regex import get_range_text, match_to_replacements_dict
from nodes.properties.inputs import EnumInput, TextInput
from nodes.properties.outputs import TextOutput
from nodes.utils.replacement import ReplacementString

from .. import text_group


class OutputMode(Enum):
FULL_MATCH = 0
PATTERN = 1


@text_group.register(
schema_id="chainner:utility:regex_find",
name="Regex Find",
description=[
"Find some text matching a given regex.",
"This node has 2 modes for output: full match and pattern.",
"- **Full Match:** return the full match. E.g. for the regex `\\d+` and the text `My two cats caught 32 mice in 14 days`, the output will be `32`."
"\n- **Pattern:** using the same pattern syntax as in other nodes, return a formatted pattern of the match. E.g. for the regex `(\\w+) is (\\w+)`, the pattern is `{1}={2}`, and the text `My name is Jane.`, the output will be `name=Jane`.",
],
icon="MdTextFields",
inputs=[
TextInput("Text"),
TextInput("Regex", placeholder=r'E.g. "\b\w+\b"'),
EnumInput(
OutputMode,
label="Output",
default=OutputMode.FULL_MATCH,
label_style="inline",
).with_id(2),
if_enum_group(2, OutputMode.PATTERN)(
TextInput("Output Pattern", default="Found {0}")
),
],
outputs=[
TextOutput(
"Text",
output_type="""
let pattern = match Input2 {
OutputMode::FullMatch => "{0}",
OutputMode::Pattern => Input3,
};
regexFind(Input0, Input1, pattern)
""",
).with_never_reason(
"Either the regex pattern or the replacement pattern is invalid"
),
],
see_also=["chainner:utility:text_replace"],
)
def regex_find_node(
text: str,
regex_pattern: str,
output: OutputMode,
output_pattern: str,
) -> str:
r = RustRegex(regex_pattern)
m = r.search(text)
if m is None:
raise RuntimeError(
f"No match found. Unable to find the pattern '{regex_pattern}' in the text."
)

if output == OutputMode.FULL_MATCH:
return get_range_text(text, m)
elif output == OutputMode.PATTERN:
replacements = match_to_replacements_dict(r, m, text)
replacement = ReplacementString(output_pattern)
return replacement.replace(replacements)
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

from enum import Enum

from chainner_ext import MatchGroup, RustRegex
from chainner_ext import RustRegex

from nodes.impl.rust_regex import match_to_replacements_dict
from nodes.properties.inputs import EnumInput, TextInput
from nodes.properties.outputs import TextOutput
from nodes.utils.replacement import ReplacementString
Expand Down Expand Up @@ -68,24 +69,11 @@ def regex_replace_node(
if mode == ReplacementMode.REPLACE_FIRST:
matches = matches[:1]

def get_group_text(group: MatchGroup | None) -> str:
if group is not None:
return text[group.start : group.end]
else:
return ""

result = ""
last_end = 0
for match in matches:
result += text[last_end : match.start]

replacements: dict[str, str] = {}
for i in range(r.groups + 1):
replacements[str(i)] = get_group_text(match.get(i))
for name, i in r.groupindex.items():
replacements[name] = get_group_text(match.get(i))

result += replacement.replace(replacements)
result += replacement.replace(match_to_replacements_dict(r, match, text))
last_end = match.end

result += text[last_end:]
Expand Down
123 changes: 100 additions & 23 deletions src/common/types/chainner-builtin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -157,16 +157,7 @@ export const formatTextPattern = (
return Intrinsic.concat(...concatArgs);
};

const regexReplaceImpl = (
text: string,
regexPattern: string,
replacementPattern: string,
count: number
): string => {
// parse and validate before doing actual work
const regex = new RRegex(regexPattern);
const replacement = new ReplacementString(replacementPattern);

const validateReplacementForRegex = (regex: RRegex, replacement: ReplacementString): void => {
// check replacement keys
const availableNames = new Set<string>([
...regex.captureNames().filter(isNotNullish),
Expand All @@ -181,7 +172,13 @@ const regexReplaceImpl = (
);
}
}

};
const regexReplaceImpl = (
text: string,
regex: RRegex,
replacement: ReplacementString,
count: number
): string => {
// do actual work
if (count === 0) {
return text;
Expand Down Expand Up @@ -220,19 +217,99 @@ export const regexReplace = wrapQuaternary<
NumberPrimitive,
StringPrimitive
>((text, regexPattern, replacementPattern, count) => {
if (
text.type === 'literal' &&
regexPattern.type === 'literal' &&
replacementPattern.type === 'literal' &&
count.type === 'literal'
) {
let regex;
if (regexPattern.type === 'literal') {
try {
const result = regexReplaceImpl(
text.value,
regexPattern.value,
replacementPattern.value,
count.value
);
regex = new RRegex(regexPattern.value);
} catch (error) {
log.debug('regexReplaceImpl', error);
return NeverType.instance;
}
}

let replacement;
if (replacementPattern.type === 'literal') {
try {
replacement = new ReplacementString(replacementPattern.value);
} catch (error) {
log.debug('regexReplaceImpl', error);
return NeverType.instance;
}
}

if (regex && replacement) {
try {
validateReplacementForRegex(regex, replacement);
} catch (error) {
log.debug('regexReplaceImpl', error);
return NeverType.instance;
}
}

if (text.type === 'literal' && count.type === 'literal' && regex && replacement) {
try {
const result = regexReplaceImpl(text.value, regex, replacement, count.value);
return new StringLiteralType(result);
} catch (error) {
log.debug('regexReplaceImpl', error);
return NeverType.instance;
}
}
return StringType.instance;
});
export const regexFindImpl = (
text: string,
regex: RRegex,
replacement: ReplacementString
): string => {
const match = regex.captures(text);
if (!match) {
throw new Error('No match found.');
}

const replacements = new Map<string, string>();
match.get.forEach((m, i) => replacements.set(String(i), m.value));
Object.entries(match.name).forEach(([name, m]) => replacements.set(name, m.value));
return replacement.replace(replacements);
};
export const regexFind = wrapTernary<
StringPrimitive,
StringPrimitive,
StringPrimitive,
StringPrimitive
>((text, regexPattern, replacementPattern) => {
let regex;
if (regexPattern.type === 'literal') {
try {
regex = new RRegex(regexPattern.value);
} catch (error) {
log.debug('regexReplaceImpl', error);
return NeverType.instance;
}
}

let replacement;
if (replacementPattern.type === 'literal') {
try {
replacement = new ReplacementString(replacementPattern.value);
} catch (error) {
log.debug('regexReplaceImpl', error);
return NeverType.instance;
}
}

if (regex && replacement) {
try {
validateReplacementForRegex(regex, replacement);
} catch (error) {
log.debug('regexReplaceImpl', error);
return NeverType.instance;
}
}

if (text.type === 'literal' && regex && replacement) {
try {
const result = regexFindImpl(text.value, regex, replacement);
return new StringLiteralType(result);
} catch (error) {
log.debug('regexReplaceImpl', error);
Expand Down
3 changes: 3 additions & 0 deletions src/common/types/chainner-scope.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
padEnd,
padStart,
parseColorJson,
regexFind,
regexReplace,
splitFilePath,
} from './chainner-builtin';
Expand Down Expand Up @@ -120,6 +121,7 @@ struct SplitFilePath {
intrinsic def formatPattern(pattern: string, ...args: string | null): string;
intrinsic def regexReplace(text: string, regex: string, replacement: string, count: uint | inf): string;
intrinsic def regexFind(text: string, regex: string, pattern: string): string;
intrinsic def padStart(text: string, width: uint, padding: string): string;
intrinsic def padEnd(text: string, width: uint, padding: string): string;
intrinsic def padCenter(text: string, width: uint, padding: string): string;
Expand All @@ -135,6 +137,7 @@ export const getChainnerScope = lazy((): Scope => {
const intrinsic: Record<string, (scope: Scope, ...args: NeverType[]) => Type> = {
formatPattern: makeScoped(formatTextPattern),
regexReplace: makeScoped(regexReplace),
regexFind: makeScoped(regexFind),
padStart: makeScoped(padStart),
padEnd: makeScoped(padEnd),
padCenter: makeScoped(padCenter),
Expand Down

0 comments on commit a35163c

Please sign in to comment.