From 6cb7ed2b259f597d558fe2e064ae98862f011561 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20F=C3=A4rber?= <01mf02@gmail.com> Date: Wed, 13 Mar 2024 10:35:32 +0100 Subject: [PATCH 1/3] Implement string splitting via division. --- jaq-core/src/lib.rs | 16 ---------------- jaq-core/tests/tests.rs | 10 ---------- jaq-interpret/src/val.rs | 15 +++++++++++++++ jaq-interpret/tests/tests.rs | 5 +++++ jaq-std/src/std.jq | 3 +++ 5 files changed, 23 insertions(+), 26 deletions(-) diff --git a/jaq-core/src/lib.rs b/jaq-core/src/lib.rs index be140b5e0..d57fc4934 100644 --- a/jaq-core/src/lib.rs +++ b/jaq-core/src/lib.rs @@ -167,18 +167,6 @@ fn as_codepoint(v: &Val) -> Result { char::from_u32(u).ok_or_else(|| Error::str(format_args!("cannot use {u} as character"))) } -/// Split a string by a given separator string. -fn split(s: &str, sep: &str) -> Vec { - if sep.is_empty() { - // Rust's `split` function with an empty separator ("") - // yields an empty string as first and last result - // to prevent this, we are using `chars` instead - s.chars().map(|s| Val::str(s.to_string())).collect() - } else { - s.split(sep).map(|s| Val::str(s.to_string())).collect() - } -} - /// This implements a ~10x faster version of: /// ~~~ text /// def range($from; $to; $by): $from | @@ -340,10 +328,6 @@ const CORE_RUN: &[(&str, usize, RunPtr)] = &[ let f = move |v| indices(&cv.1, &v?).map(|idxs| Val::arr(idxs.map(to_int).collect())); Box::new(vals.map(f)) }), - ("split", 1, |args, cv| { - let seps = args.get(0).run(cv.clone()); - Box::new(seps.map(move |sep| Ok(Val::arr(split(cv.1.as_str()?, sep?.as_str()?))))) - }), ("first", 1, |args, cv| Box::new(args.get(0).run(cv).take(1))), ("limit", 2, |args, cv| { let n = args.get(0).run(cv.clone()).map(|n| n?.as_int()); diff --git a/jaq-core/tests/tests.rs b/jaq-core/tests/tests.rs index c3630f390..ba41b7fd5 100644 --- a/jaq-core/tests/tests.rs +++ b/jaq-core/tests/tests.rs @@ -307,16 +307,6 @@ fn round() { fail(json!({}), "round", err(json!({}))); } -#[test] -fn split() { - give(json!("aöß"), r#"split("")"#, json!(["a", "ö", "ß"])); - give( - json!("abcabcdab"), - r#"split("ab")"#, - json!(["", "c", "cd", ""]), - ); -} - #[test] fn startswith() { give(json!("foobar"), r#"startswith("")"#, json!(true)); diff --git a/jaq-interpret/src/val.rs b/jaq-interpret/src/val.rs index b34aa74fc..bc233bd6f 100644 --- a/jaq-interpret/src/val.rs +++ b/jaq-interpret/src/val.rs @@ -415,6 +415,20 @@ impl core::ops::Mul for Val { } } +/// Split a string by a given separator string. +fn split<'a>(s: &'a str, sep: &'a str) -> Box + 'a> { + if s.is_empty() { + Box::new(core::iter::empty()) + } else if sep.is_empty() { + // Rust's `split` function with an empty separator ("") + // yields an empty string as first and last result + // to prevent this, we are using `chars` instead + Box::new(s.chars().map(|s| s.to_string())) + } else { + Box::new(s.split(sep).map(|s| s.to_string())) + } +} + impl core::ops::Div for Val { type Output = ValR; fn div(self, rhs: Self) -> Self::Output { @@ -426,6 +440,7 @@ impl core::ops::Div for Val { (Float(x), Float(y)) => Ok(Float(x / y)), (Num(n), r) => Self::from_dec_str(&n) / r, (l, Num(n)) => l / Self::from_dec_str(&n), + (Str(x), Str(y)) => Ok(Val::arr(split(&x, &y).map(Val::str).collect())), (l, r) => Err(Error::MathOp(l, MathOp::Div, r)), } } diff --git a/jaq-interpret/tests/tests.rs b/jaq-interpret/tests/tests.rs index aafeb8738..ae7773b69 100644 --- a/jaq-interpret/tests/tests.rs +++ b/jaq-interpret/tests/tests.rs @@ -63,6 +63,11 @@ fn mul() { ); } +yields!(div_str, r#""abcabcdab" / "ab""#, ["", "c", "cd", ""]); +yields!(div_str_empty, r#""" / """#, json!([])); +yields!(div_str_empty_str, r#""" / "ab""#, json!([])); +yields!(div_str_empty_sep, r#""aöß" / """#, ["a", "ö", "ß"]); + #[test] fn logic() { let tf = json!([true, false]); diff --git a/jaq-std/src/std.jq b/jaq-std/src/std.jq index f5d4be6b3..2076a040e 100644 --- a/jaq-std/src/std.jq +++ b/jaq-std/src/std.jq @@ -135,6 +135,9 @@ def scan(re; flags): matches(re; flags)[] | .[0].string; def match(re; flags): matches(re; flags)[] | .[0] + { captures: .[1:] }; def capture(re; flags): matches(re; flags)[] | capture_of_match; +def split($sep): + if isstring and ($sep | isstring) then . / $sep + else error("split input and separator must be strings") end; def split (re; flags): split_(re; flags + "g"); def splits(re; flags): split(re; flags)[]; From 56b35c7ad127bc9388d5b4b47406e1df60512f30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20F=C3=A4rber?= <01mf02@gmail.com> Date: Wed, 13 Mar 2024 10:55:35 +0100 Subject: [PATCH 2/3] Update jaq edition to 2021. --- jaq/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jaq/Cargo.toml b/jaq/Cargo.toml index fe49d9b1e..73a5ae723 100644 --- a/jaq/Cargo.toml +++ b/jaq/Cargo.toml @@ -2,7 +2,7 @@ name = "jaq" version = "1.3.0" authors = ["Michael Färber "] -edition = "2018" +edition = "2021" license = "MIT" readme = "../README.md" description = "Just another JSON query tool" From d20cb0864bfe92eb03007df371436ac312551d50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20F=C3=A4rber?= <01mf02@gmail.com> Date: Wed, 13 Mar 2024 10:59:37 +0100 Subject: [PATCH 3/3] Upgrade base64. --- Cargo.lock | 4 ++-- jaq-core/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1b7b3625e..5d87b9828 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -53,9 +53,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.5" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" [[package]] name = "bincode" diff --git a/jaq-core/Cargo.toml b/jaq-core/Cargo.toml index 82767df96..915437c21 100644 --- a/jaq-core/Cargo.toml +++ b/jaq-core/Cargo.toml @@ -25,7 +25,7 @@ regex = { version = "1.9", optional = true } log = { version = "0.4.17", optional = true } libm = { version = "0.2.7", optional = true } aho-corasick = { version = "1.0", optional = true } -base64 = { version = "0.21.2", optional = true } +base64 = { version = "0.22", optional = true } urlencoding = { version = "2.1.3", optional = true } [dev-dependencies]