From 6ae64448158d621bc527cd5a82ac6e80d3f0e2aa Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 1 Jan 2024 18:15:17 -0500 Subject: [PATCH 1/5] Have `clean_whitespace` re-infer types --- pkg/bifs/strings.go | 3 ++- pkg/dsl/cst/builtin_function_manager.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/bifs/strings.go b/pkg/bifs/strings.go index cd68ee4808..e77de7c684 100644 --- a/pkg/bifs/strings.go +++ b/pkg/bifs/strings.go @@ -344,11 +344,12 @@ func BIF_capitalize(input1 *mlrval.Mlrval) *mlrval.Mlrval { // ---------------------------------------------------------------- func BIF_clean_whitespace(input1 *mlrval.Mlrval) *mlrval.Mlrval { - return BIF_strip( + mv := BIF_strip( BIF_collapse_whitespace_regexp( input1, _whitespace_regexp, ), ) + return mlrval.FromInferredType(mv.String()) } // ================================================================ diff --git a/pkg/dsl/cst/builtin_function_manager.go b/pkg/dsl/cst/builtin_function_manager.go index c55f9edd9c..09fc70db0c 100644 --- a/pkg/dsl/cst/builtin_function_manager.go +++ b/pkg/dsl/cst/builtin_function_manager.go @@ -444,7 +444,7 @@ used within subsequent DSL statements. See also "Regular expressions" at ` + lib { name: "clean_whitespace", class: FUNC_CLASS_STRING, - help: "Same as collapse_whitespace and strip.", + help: "Same as collapse_whitespace and strip, followed by type inference.", unaryFunc: bifs.BIF_clean_whitespace, }, From f28fdebcc0caa5869f1fb1faed76d0334600f766 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 1 Jan 2024 18:18:05 -0500 Subject: [PATCH 2/5] make dev output --- docs/src/manpage.md | 2 +- docs/src/manpage.txt | 2 +- docs/src/reference-dsl-builtin-functions.md | 2 +- man/manpage.txt | 2 +- man/mlr.1 | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index c4a65ea40a..ae289a0978 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -2312,7 +2312,7 @@ MILLER(1) MILLER(1) (class=math #args=1) Ceiling: nearest integer at or above. 1mclean_whitespace0m - (class=string #args=1) Same as collapse_whitespace and strip. + (class=string #args=1) Same as collapse_whitespace and strip, followed by type inference. 1mcollapse_whitespace0m (class=string #args=1) Strip repeated whitespace from string. diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index fe77c56722..9a7d6eb7e9 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -2291,7 +2291,7 @@ MILLER(1) MILLER(1) (class=math #args=1) Ceiling: nearest integer at or above. 1mclean_whitespace0m - (class=string #args=1) Same as collapse_whitespace and strip. + (class=string #args=1) Same as collapse_whitespace and strip, followed by type inference. 1mcollapse_whitespace0m (class=string #args=1) Strip repeated whitespace from string. diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index f3b8efdef4..028baf9512 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -1209,7 +1209,7 @@ capitalize (class=string #args=1) Convert string's first character to uppercase ### clean_whitespace
-clean_whitespace  (class=string #args=1) Same as collapse_whitespace and strip.
+clean_whitespace  (class=string #args=1) Same as collapse_whitespace and strip, followed by type inference.
 
diff --git a/man/manpage.txt b/man/manpage.txt index fe77c56722..9a7d6eb7e9 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -2291,7 +2291,7 @@ MILLER(1) MILLER(1) (class=math #args=1) Ceiling: nearest integer at or above. 1mclean_whitespace0m - (class=string #args=1) Same as collapse_whitespace and strip. + (class=string #args=1) Same as collapse_whitespace and strip, followed by type inference. 1mcollapse_whitespace0m (class=string #args=1) Strip repeated whitespace from string. diff --git a/man/mlr.1 b/man/mlr.1 index c6c5c540f2..c75bc72e07 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -3100,7 +3100,7 @@ Map example: apply({"a":1, "b":3, "c":5}, func(k,v) {return {toupper(k): v ** 2} .RS 0 .\} .nf - (class=string #args=1) Same as collapse_whitespace and strip. + (class=string #args=1) Same as collapse_whitespace and strip, followed by type inference. .fi .if n \{\ .RE From 12e14690c8f783a3bf9566bf1c9e66490d8c1157 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 1 Jan 2024 18:23:40 -0500 Subject: [PATCH 3/5] unit-test files --- test/cases/dsl-clean-whitespace/0010/cmd | 1 + test/cases/dsl-clean-whitespace/0010/experr | 0 test/cases/dsl-clean-whitespace/0010/expout | 18 ++++++++++++++++++ test/cases/dsl-clean-whitespace/0010/input.csv | 3 +++ test/cases/dsl-clean-whitespace/0010/mlr | 2 ++ 5 files changed, 24 insertions(+) create mode 100644 test/cases/dsl-clean-whitespace/0010/cmd create mode 100644 test/cases/dsl-clean-whitespace/0010/experr create mode 100644 test/cases/dsl-clean-whitespace/0010/expout create mode 100644 test/cases/dsl-clean-whitespace/0010/input.csv create mode 100644 test/cases/dsl-clean-whitespace/0010/mlr diff --git a/test/cases/dsl-clean-whitespace/0010/cmd b/test/cases/dsl-clean-whitespace/0010/cmd new file mode 100644 index 0000000000..2fd915d023 --- /dev/null +++ b/test/cases/dsl-clean-whitespace/0010/cmd @@ -0,0 +1 @@ +mlr --icsv --ojson clean-whitespace then put -f ${CASEDIR}/mlr ${CASEDIR}/input.csv diff --git a/test/cases/dsl-clean-whitespace/0010/experr b/test/cases/dsl-clean-whitespace/0010/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/dsl-clean-whitespace/0010/expout b/test/cases/dsl-clean-whitespace/0010/expout new file mode 100644 index 0000000000..db3fe878d9 --- /dev/null +++ b/test/cases/dsl-clean-whitespace/0010/expout @@ -0,0 +1,18 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3, + "d": 4, + "e": 9, + "t": "int" +}, +{ + "a": 5, + "b": 6, + "c": 7, + "d": 8, + "e": 13, + "t": "int" +} +] diff --git a/test/cases/dsl-clean-whitespace/0010/input.csv b/test/cases/dsl-clean-whitespace/0010/input.csv new file mode 100644 index 0000000000..4320372396 --- /dev/null +++ b/test/cases/dsl-clean-whitespace/0010/input.csv @@ -0,0 +1,3 @@ +a, b, c, d +1, 2, 3, 4 +5, 6, 7, 8 diff --git a/test/cases/dsl-clean-whitespace/0010/mlr b/test/cases/dsl-clean-whitespace/0010/mlr new file mode 100644 index 0000000000..e51c30c8b6 --- /dev/null +++ b/test/cases/dsl-clean-whitespace/0010/mlr @@ -0,0 +1,2 @@ +$e = $d + 5; +$t = typeof($d) From 44cc5142136934753707834af9de77d33dff3256 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 1 Jan 2024 18:29:03 -0500 Subject: [PATCH 4/5] drive-by typofix --- pkg/dsl/cst/builtin_function_manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/dsl/cst/builtin_function_manager.go b/pkg/dsl/cst/builtin_function_manager.go index 09fc70db0c..965c9529bf 100644 --- a/pkg/dsl/cst/builtin_function_manager.go +++ b/pkg/dsl/cst/builtin_function_manager.go @@ -355,7 +355,7 @@ used within subsequent DSL statements. See also "Regular expressions" at ` + lib { name: "strmatchx", class: FUNC_CLASS_STRING, - help: `Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the ` + "`=~` operator. As well, while the `=~` operator limits matches to \\1 through \\9, an arbitrary number are supported here.", + help: `Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the ` + "`=~` operator. As well, while the `=~` operator limits matches to \\1 through \\9, an arbitrary number are supported here.", examples: []string{ `strmatchx("a", "abc") returns:`, ` {`, From 3a20831f86073666389a74ceec2a0a3b1502c4b0 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 1 Jan 2024 18:31:28 -0500 Subject: [PATCH 5/5] make dev output --- docs/src/manpage.md | 2 +- docs/src/manpage.txt | 2 +- docs/src/reference-dsl-builtin-functions.md | 2 +- man/manpage.txt | 2 +- man/mlr.1 | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index ae289a0978..8d25329699 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -3011,7 +3011,7 @@ MILLER(1) MILLER(1) strmatch(12345, "34") is true 1mstrmatchx0m - (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here. + (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here. Examples: strmatchx("a", "abc") returns: { diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 9a7d6eb7e9..915a1b7274 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -2990,7 +2990,7 @@ MILLER(1) MILLER(1) strmatch(12345, "34") is true 1mstrmatchx0m - (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here. + (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here. Examples: strmatchx("a", "abc") returns: { diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 028baf9512..3a55821f3f 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -1364,7 +1364,7 @@ strmatch(12345, "34") is true ### strmatchx
-strmatchx  (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
+strmatchx  (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
 Examples:
 strmatchx("a", "abc") returns:
   {
diff --git a/man/manpage.txt b/man/manpage.txt
index 9a7d6eb7e9..915a1b7274 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -2990,7 +2990,7 @@ MILLER(1)                                                            MILLER(1)
        strmatch(12345, "34") is true
 
    1mstrmatchx0m
-        (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
+        (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \1, \2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \1 through \9, an arbitrary number are supported here.
        Examples:
        strmatchx("a", "abc") returns:
          {
diff --git a/man/mlr.1 b/man/mlr.1
index c75bc72e07..28940393c9 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -4675,7 +4675,7 @@ strmatch(12345, "34") is true
 .RS 0
 .\}
 .nf
- (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \e1, \e2, etc. are not set, in constrast to the `=~` operator. As well, while the `=~` operator limits matches to \e1 through \e9, an arbitrary number are supported here.
+ (class=string #args=2) Extended information for whether the stringable first argument matches the regular-expression second argument. Regex captures are provided in the return-value map; \e1, \e2, etc. are not set, in contrast to the `=~` operator. As well, while the `=~` operator limits matches to \e1 through \e9, an arbitrary number are supported here.
 Examples:
 strmatchx("a", "abc") returns:
   {