From b4478750df1d9a5e73db1cf458e51f694584b9aa Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Tue, 11 May 2021 14:08:31 +0200 Subject: [PATCH 1/8] Update to rust 1.52 Signed-off-by: Heinz N. Gies --- .github/workflows/tests.yaml | 4 ++-- Dockerfile | 2 +- Dockerfile.learn | 2 +- rust-toolchain | 2 +- src/lib.rs | 4 ++-- tremor-cli/src/run.rs | 2 +- tremor-pipeline/src/lib.rs | 4 ++-- tremor-script/src/lib.rs | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index fb9e17e580..4017b46cb1 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -45,8 +45,8 @@ jobs: PROPTEST_CASES: 2500 RUSTFLAGS: -D warnings -C target-feature=+avx,+avx2,+sse4.2 with: - version: "0.16.0" - args: " --exclude-files target* tremor-cli tremor-api deprecated **/errors.rs --out Lcov --all" + version: "0.18.0-alpha3" + args: " --avoid-cfg-tarpaulin --exclude-files target* tremor-cli tremor-api **/errors.rs --out Lcov --all" - name: Coveralls uses: coverallsapp/github-action@master with: diff --git a/Dockerfile b/Dockerfile index 8532995df8..1e86aff8c7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM rust:1.51.0 as builder +FROM rust:1.52.1 as builder # Avoid warnings by switching to noninteractive ENV DEBIAN_FRONTEND=noninteractive diff --git a/Dockerfile.learn b/Dockerfile.learn index e7f32cfaaf..f4850a9f66 100644 --- a/Dockerfile.learn +++ b/Dockerfile.learn @@ -1,4 +1,4 @@ -FROM rust:1.51.0 as builder +FROM rust:1.52.1 as builder RUN cargo install --features=ssl websocat diff --git a/rust-toolchain b/rust-toolchain index ba0a719118..154cb93b2c 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -1.51.0 +1.52.1 diff --git a/src/lib.rs b/src/lib.rs index 8e725e9a36..f533dd46f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,10 +23,10 @@ clippy::unnecessary_unwrap, clippy::pedantic )] -// TODOthis is needed due to a false positive in clippy +// TODO this is needed due to a false positive in clippy // https://github.com/rust-lang/rust/issues/83125 // we will need this in 1.52.0 -// #![allow(proc_macro_back_compat)] +#![allow(proc_macro_back_compat)] #[macro_use] extern crate serde_derive; diff --git a/tremor-cli/src/run.rs b/tremor-cli/src/run.rs index 9e2ebd8dd3..1d3a1f6b0e 100644 --- a/tremor-cli/src/run.rs +++ b/tremor-cli/src/run.rs @@ -168,8 +168,8 @@ impl Egress { is_interactive, is_pretty, buffer, - postprocessor, codec, + postprocessor, }) } diff --git a/tremor-pipeline/src/lib.rs b/tremor-pipeline/src/lib.rs index 4e22d21dfe..d377abbb72 100644 --- a/tremor-pipeline/src/lib.rs +++ b/tremor-pipeline/src/lib.rs @@ -23,10 +23,10 @@ clippy::unnecessary_unwrap, clippy::pedantic )] -// TODOthis is needed due to a false positive in clippy +// TODO this is needed due to a false positive in clippy // https://github.com/rust-lang/rust/issues/83125 // we will need this in 1.52.0 -// #![allow(proc_macro_back_compat)] +#![allow(proc_macro_back_compat)] #[macro_use] extern crate serde_derive; diff --git a/tremor-script/src/lib.rs b/tremor-script/src/lib.rs index bf7d081684..55c6dfbe0f 100644 --- a/tremor-script/src/lib.rs +++ b/tremor-script/src/lib.rs @@ -25,7 +25,7 @@ // TODO this is needed due to a false positive in clippy // https://github.com/rust-lang/rust/issues/83125 // we will need this in 1.52.0 -// #![allow(proc_macro_back_compat)] +#![allow(proc_macro_back_compat)] /// The Tremor Script AST pub mod ast; From b74694e454e5f970cfab104c9351b076a90de3cd Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Tue, 11 May 2021 16:32:22 +0200 Subject: [PATCH 2/8] Some coverage recoverage Signed-off-by: Heinz N. Gies --- Dockerfile.native | 2 +- src/preprocessor.rs | 11 +- tremor-pipeline/src/op/qos/wal.rs | 64 +++++------ tremor-pipeline/src/query.rs | 136 +++++++++-------------- tremor-script/src/errors.rs | 27 ++++- tremor-script/src/grok.rs | 26 ++--- tremor-script/src/interpreter/expr.rs | 154 ++++++++++++++------------ tremor-script/src/registry.rs | 3 + tremor-script/src/std_lib/string.rs | 6 +- 9 files changed, 205 insertions(+), 224 deletions(-) diff --git a/Dockerfile.native b/Dockerfile.native index a30d7b9c14..f100f015b3 100644 --- a/Dockerfile.native +++ b/Dockerfile.native @@ -1,4 +1,4 @@ -FROM rust:1.51.0 as builder +FROM rust:1.52.1 as builder # Avoid warnings by switching to noninteractive ENV DEBIAN_FRONTEND=noninteractive diff --git a/src/preprocessor.rs b/src/preprocessor.rs index 2b3303f909..5ceaed7c89 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -502,7 +502,8 @@ mod test { } } - fn textual_length_pre_post(length in 1..100_usize) { + #[test] + fn textual_length_pre_post(length in 1..100_usize) { let data = vec![1_u8; length]; let mut pre_p = pre::TextualLength::default(); let mut post_p = post::TextualLength::default(); @@ -821,7 +822,7 @@ mod test { fn test_gzip() -> Result<()> { let int = "snot".as_bytes(); assert_simple_symmetric!(int, Gzip, "gzip"); - assert_decompress!(int, Lz4, "lz4"); + assert_decompress!(int, Gzip, "gzip"); Ok(()) } @@ -829,7 +830,7 @@ mod test { fn test_zlib() -> Result<()> { let int = "snot".as_bytes(); assert_simple_symmetric!(int, Zlib, "zlib"); - assert_decompress!(int, Lz4, "lz4"); + assert_decompress!(int, Zlib, "zlib"); Ok(()) } @@ -837,7 +838,7 @@ mod test { fn test_snappy() -> Result<()> { let int = "snot".as_bytes(); assert_simple_symmetric!(int, Snappy, "snap"); - assert_decompress!(int, Lz4, "lz4"); + assert_decompress!(int, Snappy, "snap"); Ok(()) } @@ -845,7 +846,7 @@ mod test { fn test_xz2() -> Result<()> { let int = "snot".as_bytes(); assert_simple_symmetric!(int, Xz2, "xz2"); - assert_decompress!(int, Lz4, "lz4"); + assert_decompress!(int, Xz2, "xz2"); Ok(()) } diff --git a/tremor-pipeline/src/op/qos/wal.rs b/tremor-pipeline/src/op/qos/wal.rs index 9f4536ac2f..f71d6204e4 100644 --- a/tremor-pipeline/src/op/qos/wal.rs +++ b/tremor-pipeline/src/op/qos/wal.rs @@ -47,14 +47,14 @@ impl AddAssign for Idx { impl Add for Idx { type Output = Idx; fn add(self, rhs: u64) -> Self::Output { - Idx::from(u64::from(self) + rhs) + Idx::from(u64::from(&self) + rhs) } } impl Add for Idx { type Output = Idx; fn add(self, rhs: usize) -> Self::Output { - Idx::from(u64::from(self) + rhs as u64) + Idx::from(u64::from(&self) + rhs as u64) } } @@ -65,20 +65,6 @@ impl From<&Idx> for u64 { } } -impl From<&mut Idx> for u64 { - fn from(i: &mut Idx) -> u64 { - let mut rdr = Cursor::new(&i.0); - rdr.read_u64::().unwrap_or(0) - } -} - -impl From for u64 { - fn from(i: Idx) -> u64 { - let mut rdr = Cursor::new(&i.0); - rdr.read_u64::().unwrap_or(0) - } -} - impl From for Idx { fn from(v: IVec) -> Self { let mut rdr = Cursor::new(v); @@ -98,7 +84,7 @@ impl Idx { self.0 = unsafe { mem::transmute(v.to_be()) }; } fn set_min(&mut self, v: u64) { - if v < u64::from(*self) { + if v < u64::from(&*self) { self.0 = unsafe { mem::transmute(v.to_be()) }; } } @@ -109,24 +95,12 @@ impl AsRef<[u8]> for Idx { } } -impl From for IVec { - fn from(i: Idx) -> Self { - IVec::from(&i.0) - } -} - impl From<&Idx> for IVec { fn from(i: &Idx) -> Self { IVec::from(&i.0) } } -impl From<&mut Idx> for IVec { - fn from(i: &mut Idx) -> Self { - IVec::from(&i.0) - } -} - #[derive(Debug, Clone, Deserialize, serde::Deserialize, serde::Serialize)] pub struct Config { /// Maximum number of events to read per tick/event when filling @@ -193,17 +167,13 @@ pub struct Wal { } op!(WalFactory(_uid, node) { - if let Some(map) = &node.config { - let config: Config = Config::new(map)?; - - if let (None, None) = (config.max_elements, config.max_bytes) { - Err(ErrorKind::BadOpConfig("WAL operator needs at least one of `max_elements` or `max_bytes` config entries.".to_owned()).into()) - } else { - Ok(Box::new(Wal::new(node.id.to_string(), config)?)) - } + let map = node.config.as_ref().ok_or_else(|| ErrorKind::MissingOpConfig(node.id.to_string()))?; + let config: Config = Config::new(&map)?; + if config.max_elements.or(config.max_bytes).is_none() { + Err(ErrorKind::BadOpConfig("WAL operator needs at least one of `max_elements` or `max_bytes` config entries.".to_string()).into()) } else { - Err(ErrorKind::MissingOpConfig(node.id.to_string()).into()) + Ok(Box::new(Wal::new(node.id.to_string(), config)?)) } }); @@ -372,7 +342,7 @@ impl Operator for Wal { insight.id.track(&e.id); } - let current_confirmed = self.confirmed.map(u64::from).unwrap_or_default(); + let current_confirmed = self.confirmed.map(|v| u64::from(&v)).unwrap_or_default(); if event_id < current_confirmed { warn!( "trying to fail a message({}) that was already confirmed({})", @@ -750,4 +720,20 @@ mod test { } Ok(()) } + + #[test] + fn from() -> Result<()> { + assert_eq!(42, u64::from(&(Idx::from(40u64) + 2u64))); + assert_eq!(42, u64::from(&(Idx::from(40u64) + 2usize))); + + Ok(()) + } + + #[test] + fn as_ref() -> Result<()> { + let i = Idx::from(42u64); + let s: &[u8] = i.as_ref(); + assert_eq!(&[0, 0, 0, 0, 0, 0, 0, 42u8][..], s); + Ok(()) + } } diff --git a/tremor-pipeline/src/query.rs b/tremor-pipeline/src/query.rs index 91c1538bd2..c8196ebfcc 100644 --- a/tremor-pipeline/src/query.rs +++ b/tremor-pipeline/src/query.rs @@ -218,15 +218,14 @@ impl Query { ..NodeConfig::default() }); nodes.insert(name.clone(), id); - let op = match pipe_graph.raw_nodes().get(id.index()) { - Some(node) => { + let op = pipe_graph + .raw_nodes() + .get(id.index()) + .ok_or_else(|| Error::from("Error finding freshly added node.")) + .and_then(|node| { node.weight - .to_op(idgen.next_id(), supported_operators, None, None, None)? - } - None => { - return Err(format!("Error finding node {:?} in constructed graph", id).into()) - } - }; + .to_op(idgen.next_id(), supported_operators, None, None, None) + })?; pipe_ops.insert(id, op); match node_kind { NodeKind::Input => { @@ -305,22 +304,19 @@ impl Query { ..NodeConfig::default() }); nodes.insert(name.clone(), id); - let op = match pipe_graph.raw_nodes().get(id.index()) { - Some(node) => node.weight.to_op( - idgen.next_id(), - supported_operators, - None, - None, - None, - )?, - None => { - return Err(format!( - "Error finding freshly added node {:?} in pipeline graph.", - id + let op = pipe_graph + .raw_nodes() + .get(id.index()) + .ok_or_else(|| Error::from("Error finding freshly added node.")) + .and_then(|node| { + node.weight.to_op( + idgen.next_id(), + supported_operators, + None, + None, + None, ) - .into()) - } - }; + })?; pipe_ops.insert(id, op); inputs.insert(name, id); } @@ -338,22 +334,20 @@ impl Query { ..NodeConfig::default() }); nodes.insert(name, id); - let op = match pipe_graph.raw_nodes().get(id.index()) { - Some(node) => node.weight.to_op( - idgen.next_id(), - supported_operators, - None, - None, - None, - )?, - None => { - return Err(format!( - "Error finding freshly added node {:?} in pipeline graph.", - id + let op = pipe_graph + .raw_nodes() + .get(id.index()) + .ok_or_else(|| Error::from("Error finding freshly added node.")) + .and_then(|node| { + node.weight.to_op( + idgen.next_id(), + supported_operators, + None, + None, + None, ) - .into()) - } - }; + })?; + pipe_ops.insert(id, op); outputs.push(id); } @@ -619,21 +613,18 @@ impl Query { let mut contraflow = Vec::new(); // Nodes that handle signals let mut signalflow = Vec::new(); - let mut i = 0; - for nx in pipe_graph.node_indices() { - if let Some(op) = pipe_ops.remove(&nx) { - i2pos.insert(nx, i); - if op.handles_contraflow() { - contraflow.push(i); - } - if op.handles_signal() { - signalflow.push(i); - } - graph.push(op); - i += 1; - } else { - return Err(format!("Invalid pipeline can't find node {:?}", &nx).into()); + for (i, nx) in pipe_graph.node_indices().enumerate() { + let op = pipe_ops + .remove(&nx) + .ok_or_else(|| format!("Invalid pipeline can't find node {:?}", &nx))?; + i2pos.insert(nx, i); + if op.handles_contraflow() { + contraflow.push(i); + } + if op.handles_signal() { + signalflow.push(i); } + graph.push(op); } // since contraflow is the reverse we need to reverse it. contraflow.reverse(); @@ -692,13 +683,9 @@ fn select( node: Option, windows: Option>, ) -> Result> { - let node = if let Some(node) = node { - node - } else { - return Err( - ErrorKind::MissingOpConfig("trickle operators require a statement".into()).into(), - ); - }; + let node = node.ok_or_else(|| { + ErrorKind::MissingOpConfig("trickle operators require a statement".into()) + })?; let select_type = match node.stmt.suffix() { tremor_script::ast::Stmt::Select(ref select) => select.complexity(), _ => { @@ -719,14 +706,9 @@ fn select( )?)), SelectType::Normal => { let groups = Dims::new(node.stmt.clone()); - let windows = if let Some(windows) = windows { - windows - } else { - return Err(ErrorKind::MissingOpConfig( - "select operators require a window mapping".into(), - ) - .into()); - }; + let windows = windows.ok_or_else(|| { + ErrorKind::MissingOpConfig("select operators require a window mapping".into()) + })?; let windows: Result> = if let tremor_script::ast::Stmt::Select(s) = node.stmt.suffix() { s.stmt @@ -762,13 +744,9 @@ fn operator( config: &NodeConfig, node: Option, ) -> Result> { - let node = if let Some(node) = node { - node - } else { - return Err( - ErrorKind::MissingOpConfig("trickle operators require a statement".into()).into(), - ); - }; + let node = node.ok_or_else(|| { + ErrorKind::MissingOpConfig("trickle operators require a statement".into()) + })?; Ok(Box::new(TrickleOperator::with_stmt( operator_uid, config.id.clone().to_string(), @@ -781,13 +759,9 @@ fn script( defn: Option, node: Option, ) -> Result> { - let node = if let Some(node) = node { - node - } else { - return Err( - ErrorKind::MissingOpConfig("trickle operators require a statement".into()).into(), - ); - }; + let node = node.ok_or_else(|| { + ErrorKind::MissingOpConfig("trickle operators require a statement".into()) + })?; Ok(Box::new(Trickle::with_stmt( config.id.clone().to_string(), defn.ok_or_else(|| Error::from("Script definition missing"))?, diff --git a/tremor-script/src/errors.rs b/tremor-script/src/errors.rs index 535d193acd..7bbd270b4d 100644 --- a/tremor-script/src/errors.rs +++ b/tremor-script/src/errors.rs @@ -942,8 +942,18 @@ pub(crate) fn error_need_obj( got: ValueType, meta: &NodeMetas, ) -> Result { - error_type_conflict_mult(outer, inner, got, vec![ValueType::Object], meta) + Err(error_need_obj_err(outer, inner, got, meta)) } + +pub(crate) fn error_need_obj_err( + outer: &O, + inner: &I, + got: ValueType, + meta: &NodeMetas, +) -> Error { + error_type_conflict_mult_err(outer, inner, got, vec![ValueType::Object], meta) +} + pub(crate) fn error_need_arr( outer: &O, inner: &I, @@ -1190,8 +1200,19 @@ pub(crate) fn error_bad_key<'script, T, O: BaseExpr, I: BaseExpr>( options: Vec, meta: &NodeMetas, ) -> Result { + Err(error_bad_key_err(outer, inner, path, key, options, meta)) +} + +pub(crate) fn error_bad_key_err<'script, O: BaseExpr, I: BaseExpr>( + outer: &O, + inner: &I, + path: &ast::Path<'script>, + key: String, + options: Vec, + meta: &NodeMetas, +) -> Error { let expr: Range = outer.extent(meta); - Err(match path { + match path { ast::Path::Reserved(_) | ast::Path::Local(_) | ast::Path::Const(_) => { ErrorKind::BadAccessInLocal(expr, inner.extent(meta), key, options).into() } @@ -1204,5 +1225,5 @@ pub(crate) fn error_bad_key<'script, T, O: BaseExpr, I: BaseExpr>( ast::Path::State(_p) => { ErrorKind::BadAccessInState(expr, inner.extent(meta), key, options).into() } - }) + } } diff --git a/tremor-script/src/grok.rs b/tremor-script/src/grok.rs index 853fd5a097..98cce2e50a 100644 --- a/tremor-script/src/grok.rs +++ b/tremor-script/src/grok.rs @@ -149,17 +149,12 @@ mod tests { let codec = Pattern::new(&pat).expect("bad pattern"); let decoded = codec.matches(raw.as_bytes()); dbg!(&decoded); - match decoded { - Ok(j) => { + decoded + .map(|j| { assert_eq!(j, json); true - } - - Err(x) => { - eprintln!("{}", x); - false - } - } + }) + .unwrap_or_default() } #[test] @@ -181,14 +176,11 @@ mod tests { fn no_match() { let codec = Pattern::new(&"{}").expect("bad pattern"); let decoded = codec.matches(b"cookie monster"); - if let Err(decoded) = decoded { - assert_eq!( - "No match for log text: cookie monster", - decoded.description() - ) - } else { - eprintln!("Expected no match") - }; + let decoded = decoded.err().expect("Expected no match"); + assert_eq!( + "No match for log text: cookie monster", + decoded.description() + ) } #[test] diff --git a/tremor-script/src/interpreter/expr.rs b/tremor-script/src/interpreter/expr.rs index a5c3561d49..1a933fc69c 100644 --- a/tremor-script/src/interpreter/expr.rs +++ b/tremor-script/src/interpreter/expr.rs @@ -16,16 +16,19 @@ use super::{ merge_values, patch_value, resolve, resolve_value, set_local_shadow, test_guard, test_predicate_expr, Env, ExecOpts, LocalStack, NULL, }; -use crate::ast::{ - BaseExpr, ClauseGroup, ClausePreCondition, Comprehension, DefaultCase, EmitExpr, EventPath, - Expr, IfElse, ImutExprInt, Match, Merge, Patch, Path, Segment, -}; use crate::errors::{ - error_assign_array, error_assign_to_const, error_bad_key, error_invalid_assign_target, - error_need_obj, error_no_clause_hit, error_oops, Result, + error_assign_array, error_assign_to_const, error_bad_key_err, error_invalid_assign_target, + error_need_obj, error_need_obj_err, error_no_clause_hit, Result, }; use crate::prelude::*; use crate::registry::RECUR_PTR; +use crate::{ + ast::{ + BaseExpr, ClauseGroup, ClausePreCondition, Comprehension, DefaultCase, EmitExpr, EventPath, + Expr, IfElse, ImutExprInt, Match, Merge, Patch, Path, Segment, + }, + errors::error_oops_err, +}; use crate::{stry, Value}; use matches::matches; use std::mem; @@ -448,13 +451,16 @@ where } Path::Local(lpath) => { - if let Some(l) = stry!(local.get(lpath.idx, self, lpath.mid(), &env.meta)) { - let l: &mut Value<'event> = unsafe { mem::transmute(l) }; - l - } else { - let key = env.meta.name_dflt(lpath.mid).to_string(); - return error_bad_key(self, lpath, &path, key, vec![], &env.meta); - } + stry!(local + .get(lpath.idx, self, lpath.mid(), &env.meta) + .and_then(|o| { + o.as_ref() + .map(|l| -> &mut Value<'event> { unsafe { mem::transmute(l) } }) + .ok_or_else(|| { + let key = env.meta.name_dflt(lpath.mid).to_string(); + error_bad_key_err(self, lpath, &path, key, vec![], &env.meta) + }) + })) } Path::Meta(_path) => meta, Path::Event(_path) => event, @@ -470,28 +476,34 @@ where for segment in segments { match segment { Segment::Id { key, .. } => { - current = if let Ok(next) = key.lookup_or_insert_mut( - unsafe { mem::transmute::<&Value, &mut Value>(current) }, - || Value::object_with_capacity(halfbrown::VEC_LIMIT_UPPER), - ) { - next - } else { - return error_need_obj(self, segment, current.value_type(), &env.meta); - }; + current = stry!(key + .lookup_or_insert_mut( + unsafe { mem::transmute::<&Value, &mut Value>(current) }, + || Value::object_with_capacity(halfbrown::VEC_LIMIT_UPPER), + ) + .map_err(|_| error_need_obj_err( + self, + segment, + current.value_type(), + &env.meta + ))); } Segment::Element { expr, .. } => { let id = stry!(expr.eval_to_string(opts, env, event, state, meta, local)); let v: &mut Value<'event> = unsafe { mem::transmute(current) }; - if let Some(map) = v.as_object_mut() { - current = match map.get_mut(&id) { - Some(v) => v, - None => map - .entry(id) - .or_insert_with(|| Value::object_with_capacity(32)), - } - } else { - return error_need_obj(self, segment, current.value_type(), &env.meta); - } + let map = stry!(v.as_object_mut().ok_or_else(|| error_need_obj_err( + self, + segment, + current.value_type(), + &env.meta + ))); + + current = match map.get_mut(&id) { + Some(v) => v, + None => map + .entry(id) + .or_insert_with(|| Value::object_with_capacity(32)), + }; } Segment::Idx { .. } | Segment::Range { .. } => { return error_assign_array(self, segment, &env.meta) @@ -583,31 +595,27 @@ where expr: ImutExprInt::Path(Path::Event(EventPath { segments, .. })), port, .. - } if segments.is_empty() => { - let port = if let Some(port) = port { - Some( - stry!(port.eval_to_string(opts, env, event, state, meta, local)) - .to_string(), - ) - } else { - None - }; - Ok(Cont::EmitEvent(port)) - } - expr => { - let port = if let Some(port) = &expr.port { - Some( - stry!(port.eval_to_string(opts, env, event, state, meta, local)) - .to_string(), - ) - } else { - None - }; - Ok(Cont::Emit( - stry!(expr.expr.run(opts, env, event, state, meta, local)).into_owned(), - port, - )) - } + } if segments.is_empty() => port + .as_ref() + .map(|port| { + port.eval_to_string(opts, env, event, state, meta, local) + .map(|s| s.to_string()) + }) + .transpose() + .map(Cont::EmitEvent), + expr => expr + .port + .as_ref() + .map(|port| { + port.eval_to_string(opts, env, event, state, meta, local) + .map(|p| p.to_string()) + }) + .transpose() + .and_then(|port| { + expr.expr + .run(opts, env, event, state, meta, local) + .map(|v| Cont::Emit(v.into_owned(), port)) + }), }, Expr::Drop { .. } => Ok(Cont::Drop), Expr::AssignMoveLocal { idx, path, .. } => { @@ -615,27 +623,27 @@ where // this local variable again, it allows os to // move the variable instead of cloning it - let value = if let Some(v) = local.values.get_mut(*idx) { - let mut opt: Option = None; - std::mem::swap(v, &mut opt); - if let Some(v) = opt { - v - } else { - return error_oops( - self, - 0xdead_000c, - "Unknown local variable in Expr::AssignMoveLocal", - &env.meta, - ); - } - } else { - return error_oops( + let value = stry!(local + .values + .get_mut(*idx) + .ok_or_else(|| error_oops_err( self, 0xdead_000b, "Unknown local variable in Expr::AssignMoveLocal", &env.meta, - ); - }; + )) + .and_then(|v| { + let mut opt: Option = None; + std::mem::swap(v, &mut opt); + opt.ok_or_else(|| { + error_oops_err( + self, + 0xdead_000c, + "Unknown local variable in Expr::AssignMoveLocal", + &env.meta, + ) + }) + })); self.assign(opts, env, event, state, meta, local, &path, value) .map(Cont::Cont) } diff --git a/tremor-script/src/registry.rs b/tremor-script/src/registry.rs index faa86ac01b..88fa92ada9 100644 --- a/tremor-script/src/registry.rs +++ b/tremor-script/src/registry.rs @@ -294,11 +294,13 @@ impl TremorFnWrapper { /// Check if a given arity is valit for the function #[must_use] + #[cfg(not(tarpaulin_include))] // just a passthrough pub fn valid_arity(&self, n: usize) -> bool { self.fun.valid_arity(n) } /// Returns the functions arity #[must_use] + #[cfg(not(tarpaulin_include))] // just a passthrough pub fn arity(&self) -> RangeInclusive { self.fun.arity() } @@ -708,6 +710,7 @@ impl TremorAggrFnWrapper { /// /// # Errors /// if compensating the function fails + #[cfg(not(tarpaulin_include))] // just a passthrough pub fn compensate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()> { self.fun.compensate(args) } diff --git a/tremor-script/src/std_lib/string.rs b/tremor-script/src/std_lib/string.rs index aac5ae4330..7efa684f73 100644 --- a/tremor-script/src/std_lib/string.rs +++ b/tremor-script/src/std_lib/string.rs @@ -146,11 +146,7 @@ pub fn load(registry: &mut Registry) { }) })) .insert(tremor_const_fn!(string|substr(_context, _input, _start, _end) { - let (input, start, end) = if let (Some(input), Some(start), Some(end)) = (_input.as_str(), _start.as_usize(), _end.as_usize()) { - (input, start, end) - } else { - return Err(FunctionError::BadType{mfa: this_mfa()}) - }; + let ((input, start), end) = _input.as_str().zip(_start.as_usize()).zip(_end.as_usize()).ok_or_else(||FunctionError::BadType{mfa: this_mfa()})?; // Since rust doesn't handle UTF8 indexes we have to translate this // _input.char_indices() - get an iterator over codepoint indexes // .nth(*_start as usize) - try to get the nth character as a byte index - returns an option of a two tuple From 30c6f4aa5586270cce9ddd29deb8a7cbe3dd8bf7 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Wed, 12 May 2021 12:05:49 +0200 Subject: [PATCH 3/8] Add coverage for match clauses Signed-off-by: Heinz N. Gies --- tremor-script/src/ast/eq.rs | 96 ++++++++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 7 deletions(-) diff --git a/tremor-script/src/ast/eq.rs b/tremor-script/src/ast/eq.rs index 923380a261..89603894a1 100644 --- a/tremor-script/src/ast/eq.rs +++ b/tremor-script/src/ast/eq.rs @@ -644,7 +644,10 @@ impl AstEq for TremorFnWrapper { #[cfg(test)] mod tests { - use crate::ast::Expr; + + use std::collections::BTreeMap; + + use crate::ast::{ClauseGroup, Expr, ImutExpr}; use crate::errors::Result; use crate::path::ModulePath; use crate::registry::registry; @@ -882,6 +885,14 @@ mod tests { case %[] => "empty_array" case %[ %{ present x } ] => "complex" case %{ absent y, snot == "badger", superhero ~= %{absent name}} when event.superhero.is_snotty => "snotty_badger" + case %{ absent y } => "blarg" + case %{ absent x } => "blarg" + case %{ snot == "badger" } => "argh" + case %{ snot == "meh" } => "argh" + case %{ snot == "muh" } => "argh" + case %{ snot == 1 } => "argh" + case %{ snot == 2 } => "argh" + case %{ snot == 3 } => "argh" default => null end); (match x of @@ -891,6 +902,12 @@ mod tests { case %[] => "empty_array" case %[ %{ present x } ] => "complex" case %{ absent y, snot == "badger", superhero ~= %{absent name}} when event.superhero.is_snotty => "snotty_badger" + case %{ snot == "badger" } => "argh" + case %{ snot == "meh" } => "argh" + case %{ snot == "muh" } => "argh" + case %{ snot == 1 } => "argh" + case %{ snot == 2 } => "argh" + case %{ snot == 3 } => "argh" default => "not_null" end) "# @@ -997,13 +1014,79 @@ mod tests { "# ); - #[test] - fn recur_eq_test() -> Result<()> { - let imut_expr = crate::ast::ImutExpr(ImutExprInt::Path(Path::Event(EventPath { + fn imut_expr() -> ImutExpr<'static> { + ImutExpr(ImutExprInt::Path(Path::Event(EventPath { mid: 1, segments: vec![], - }))); - let e: crate::ast::ImutExprs = vec![imut_expr]; + }))) + } + #[test] + fn clause_group() { + let pc = PredicateClause { + mid: 0, + pattern: Pattern::Default, + guard: None, + exprs: vec![], + last_expr: imut_expr(), + }; + assert!(ClauseGroup::Single { + precondition: None, + pattern: pc.clone(), + } + .ast_eq(&ClauseGroup::Single { + precondition: None, + pattern: pc.clone(), + })); + assert!(ClauseGroup::Simple { + precondition: None, + patterns: vec![pc.clone()], + } + .ast_eq(&ClauseGroup::Simple { + precondition: None, + patterns: vec![pc.clone()], + })); + assert!(ClauseGroup::SearchTree { + precondition: None, + tree: BTreeMap::new(), + rest: vec![pc.clone()], + } + .ast_eq(&ClauseGroup::SearchTree { + precondition: None, + tree: BTreeMap::new(), + rest: vec![pc.clone()], + })); + assert!(ClauseGroup::Combined { + precondition: None, + groups: vec![ClauseGroup::Single { + precondition: None, + pattern: pc.clone(), + }], + } + .ast_eq(&ClauseGroup::Combined { + precondition: None, + groups: vec![ClauseGroup::Single { + precondition: None, + pattern: pc.clone(), + }], + })); + } + + #[test] + fn default_case() { + assert!(DefaultCase::::None.ast_eq(&DefaultCase::None)); + assert!(DefaultCase::One(imut_expr()).ast_eq(&DefaultCase::One(imut_expr()))); + assert!(DefaultCase::Many { + exprs: vec![], + last_expr: Box::new(imut_expr()) + } + .ast_eq(&DefaultCase::Many { + exprs: vec![], + last_expr: Box::new(imut_expr()) + })); + } + #[test] + fn recur_eq_test() { + let e: crate::ast::ImutExprs = vec![imut_expr()]; let recur1 = Recur { mid: 1, argc: 2, @@ -1019,7 +1102,6 @@ mod tests { assert!(recur1.ast_eq(&recur2)); recur2.open = false; assert!(!recur1.ast_eq(&recur2)); - Ok(()) } #[test] From 48b0f9de402ce4fedaffd947894d809f7e233195 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Wed, 12 May 2021 13:52:21 +0200 Subject: [PATCH 4/8] cleanup preprocess Signed-off-by: Heinz N. Gies --- .../lexer_triple_colon/error.txt | 2 +- tremor-script/src/grammar.lalrpop | 10 +- tremor-script/src/lexer.rs | 431 ++++++------------ tremor-script/src/pos.rs | 5 + 4 files changed, 141 insertions(+), 307 deletions(-) diff --git a/tests/script_errors/lexer_triple_colon/error.txt b/tests/script_errors/lexer_triple_colon/error.txt index efa70be89c..a245786eda 100644 --- a/tests/script_errors/lexer_triple_colon/error.txt +++ b/tests/script_errors/lexer_triple_colon/error.txt @@ -1,5 +1,5 @@ Error: 1 | # we can't have triple colons 2 | a:::b() - | ^^^ Found the token `:::` but expected one of `!=`, `%`, `&`, `(`, `)`, `*`, `+`, `,`, `-`, `.`, `.`, `/`, `:`, `::`, `;`, `<`, `<<`, `<=`, ``, `==`, `=>`, `>`, `>=`, `>>`, `>>>`, `[`, `]`, `^`, `and`, `case`, `default`, `end`, `of`, `or`, `when`, `xor`, `|`, `}` + | ^^^ Found the token `:::` but expected one of `!=`, `%`, `&`, `(`, `)`, `*`, `+`, `,`, `-`, `.`, `.`, `/`, `:`, `::`, `;`, `<`, `<<`, `<=`, ``, `==`, `=>`, `>`, `>=`, `>>`, `>>>`, `[`, `]`, `^`, `and`, `case`, `default`, `end`, `of`, `or`, `when`, `xor`, `}` | NOTE: Did you mean to use `::`? \ No newline at end of file diff --git a/tremor-script/src/grammar.lalrpop b/tremor-script/src/grammar.lalrpop index a4a2c6853c..2c01edc977 100644 --- a/tremor-script/src/grammar.lalrpop +++ b/tremor-script/src/grammar.lalrpop @@ -380,7 +380,7 @@ AndExprImut: ImutExprRaw<'input> = { /// Bitwise or BitOrExprImut: ImutExprRaw<'input> = { - > => <>, +// > => <>, BitXorExprImut => <>, } @@ -1243,9 +1243,9 @@ BinXor: BinOpKind = { BinAnd: BinOpKind = { "and" => BinOpKind::And, } -BinBitOr: BinOpKind = { - "|" => BinOpKind::BitOr, -} +// BinBitOr: BinOpKind = { +// "|" => BinOpKind::BitOr, +// } BinBitXor: BinOpKind = { "^" => BinOpKind::BitXor, } @@ -1343,7 +1343,7 @@ extern { ">>>" => Token::RBitShiftUnsigned, "<<" => Token::LBitShift, "!" => Token::BitNot, - "|" => Token::BitOr, +// "|" => Token::BitOr, "^" => Token::BitXor, "&" => Token::BitAnd, "+" => Token::Add, diff --git a/tremor-script/src/lexer.rs b/tremor-script/src/lexer.rs index 895e54592b..9f66181c4f 100644 --- a/tremor-script/src/lexer.rs +++ b/tremor-script/src/lexer.rs @@ -117,7 +117,7 @@ pub(crate) fn ident_to_token(ident: &str) -> Token { "from" => Token::From, "where" => Token::Where, "with" => Token::With, - "order" => Token::Order, + // "order" => Token::Order, "group" => Token::Group, "by" => Token::By, "having" => Token::Having, @@ -269,7 +269,7 @@ pub enum Token<'input> { /// bitwise and `&` BitAnd, /// bitwise or `|` - BitOr, + // BitOr, /// bitwise xor `^` BitXor, /// equal `=` @@ -352,7 +352,7 @@ pub enum Token<'input> { /// The `with` keyword With, /// The `order` keyword - Order, + // Order, /// the `group` keyword Group, /// The `by` keyword @@ -394,16 +394,6 @@ pub enum Token<'input> { } impl<'input> Token<'input> { - /// a prettified version of the token - #[must_use] - pub fn prettify(&self) -> String { - if self.is_keyword() || self.is_symbol() || self.is_ignorable() { - format!("{}", self) - } else { - std::any::type_name::().to_string() - } - } - /// Is the token ignorable except when syntax or error highlighting. /// Is the token insignificant when parsing ( a correct ... ) source. #[cfg(not(tarpaulin_include))] // matches is not supported @@ -452,7 +442,7 @@ impl<'input> Token<'input> { | Token::Move | Token::Of | Token::Operator - | Token::Order + // | Token::Order | Token::Patch | Token::Present | Token::Script @@ -536,7 +526,7 @@ impl<'input> Token<'input> { | Token::Or | Token::Xor | Token::And - | Token::BitOr + // | Token::BitOr | Token::BitXor | Token::BitAnd | Token::Eq @@ -598,11 +588,7 @@ impl<'input> fmt::Display for Token<'input> { let value: &str = &value; let s = Value::from(value).encode(); // Strip the quotes - if let Some(s) = s.get(1..s.len() - 1) { - write!(f, "{}", s) - } else { - Ok(()) - } + write!(f, "{}", s.get(1..s.len() - 1).unwrap_or_default()) } Token::HereDocStart => { // here we write the following linebreak @@ -705,7 +691,7 @@ impl<'input> fmt::Display for Token<'input> { Token::Or => write!(f, "or"), Token::Xor => write!(f, "xor"), Token::BitAnd => write!(f, "&"), - Token::BitOr => write!(f, "|"), + // Token::BitOr => write!(f, "|"), Token::BitXor => write!(f, "^"), Token::Eq => write!(f, "="), Token::EqEq => write!(f, "=="), @@ -730,7 +716,7 @@ impl<'input> fmt::Display for Token<'input> { Token::From => write!(f, "from"), Token::Where => write!(f, "where"), Token::With => write!(f, "with"), - Token::Order => write!(f, "order"), + // Token::Order => write!(f, "order"), Token::Group => write!(f, "group"), Token::By => write!(f, "by"), Token::Having => write!(f, "having"), @@ -859,10 +845,10 @@ pub struct Preprocessor {} macro_rules! take_while { ($let:ident, $token:pat, $iter:expr) => { - $let = $iter.next(); + $let = $iter.next().ok_or(ErrorKind::UnexpectedEndOfStream)??; loop { - if let Some(Ok(Spanned { value: $token, .. })) = $let { - $let = $iter.next(); + if let Spanned { value: $token, .. } = $let { + $let = $iter.next().ok_or(ErrorKind::UnexpectedEndOfStream)??; continue; } break; @@ -884,11 +870,7 @@ impl CompilationUnit { file_path: p.into_boxed_path(), } } - /// String representation of the computational unit - #[must_use] - pub fn to_str(&self) -> Option<&str> { - self.file_path.to_str() - } + /// Returns the path of the file for this #[must_use] pub fn file_path(&self) -> &Path { @@ -939,16 +921,13 @@ impl IncludeStack { pub fn push + ?Sized>(&mut self, file: &S) -> Result { let e = CompilationUnit::from_file(Path::new(file)); if self.contains(&e) { - Err(format!( - "Cyclic dependency detected: {} -> {}", - self.elements - .iter() - .map(ToString::to_string) - .collect::>() - .join(" -> "), - e.to_string() - ) - .into()) + let es = self + .elements + .iter() + .map(ToString::to_string) + .collect::>() + .join(" -> "); + Err(format!("Cyclic dependency detected: {} -> {}", es, e.to_string()).into()) } else { let cu = self.cus.len(); self.cus.push(e.clone()); @@ -961,6 +940,23 @@ impl IncludeStack { } } +fn urt(next: Spanned>, alt: &[&'static str]) -> Error { + ErrorKind::UnrecognizedToken( + Range::from(next.span).expand_lines(2), + Range::from(next.span), + next.value.to_string(), + alt.iter().map(ToString::to_string).collect(), + ) + .into() +} + +fn as_ident(next: Spanned>) -> Result> { + if let Token::Ident(id, _) = next.value { + Ok(id) + } else { + Err(urt(next, &["``"])) + } +} impl<'input> Preprocessor { pub(crate) fn resolve( module_path: &ModulePath, @@ -987,13 +983,10 @@ impl<'input> Preprocessor { } } - Err(ErrorKind::ModuleNotFound( - Range::from((use_span.start, use_span.end)).expand_lines(2), - Range::from((span2.start, span2.end)), - rel_module_path.to_string_lossy().to_string(), - module_path.mounts.clone(), - ) - .into()) + let outer = Range::from((use_span.start, use_span.end)).expand_lines(2); + let inner = Range::from((span2.start, span2.end)); + let path = rel_module_path.to_string_lossy().to_string(); + Err(ErrorKind::ModuleNotFound(outer, inner, path, module_path.mounts.clone()).into()) } #[allow(clippy::too_many_lines)] @@ -1018,298 +1011,134 @@ impl<'input> Preprocessor { let mut next; loop { - next = iter.next(); + next = iter.next().ok_or(ErrorKind::UnexpectedEndOfStream)??; match next { // // use [as ] ; // - Some(Ok(Spanned { + Spanned { value: Token::Use, span: use_span, .. - })) => { + } => { let mut rel_module_path = PathBuf::new(); - let mut alias = String::from(""); + let mut alias; take_while!(next, Token::Whitespace(_), iter); - if let Some(Ok(Spanned { - value: Token::Ident(ref id, _), - .. - })) = &next - { - let id_str: &str = &id; - rel_module_path.push(id_str); - alias = id.to_string(); + let id = as_ident(next)?; + alias = id.to_string(); + rel_module_path.push(&alias); - take_while!(next, Token::Whitespace(_), iter); + take_while!(next, Token::Whitespace(_), iter); - loop { - if let Some(Ok(Spanned { - value: Token::ColonColon, - .. - })) = next - { - // module path of the form: - // [ :: ]*; - // - take_while!(next, Token::Whitespace(_), iter); - - if let Some(Ok(Spanned { ref value, .. })) = next { - if let Token::Ident(id, ..) = &value { - rel_module_path.push(&format!("{}", &id)); - alias = id.to_string(); - take_while!(next, Token::Whitespace(_), iter); - - if let Some(Ok(Spanned { - ref value, span, .. - })) = next - { - match value { - Token::As => (), - Token::ColonColon => continue, - Token::Semi => break, - bad_token => { - return Err(ErrorKind::UnrecognizedToken( - Range::from((span.start, span.end)) - .expand_lines(2), - Range::from((span.start, span.end)), - bad_token.to_string(), - vec![ - "`as`".to_string(), - "`::`".to_string(), - "`;`".to_string(), - ], - ) - .into()); - } - } - } - } else if let Some(Ok(Spanned { - value: Token::As, .. - })) = next - { - if let Some(Ok(Spanned { - value: bad_token, - span, - .. - })) = next - { - return Err(ErrorKind::UnrecognizedToken( - Range::from((span.start, span.end)).expand_lines(2), - Range::from((span.start, span.end)), - bad_token.to_string(), - vec!["``".to_string()], - ) - .into()); - } - } else if let Some(Ok(Spanned { - value: Token::Semi, .. - })) = next - { - break; - } else { - // We expect either ';' or 'as' - // At this point we have a parse error / bad token - // - match next { - Some(Ok(Spanned { value, span, .. })) => { - let bad_token = value; - return Err(ErrorKind::UnrecognizedToken( - Range::from((span.start, span.end)) - .expand_lines(2), - Range::from((span.start, span.end)), - bad_token.to_string(), - vec!["``".to_string()], - ) - .into()); - } - Some(Err(e)) => { - return Err(e); - } - None => { - return Err(ErrorKind::UnexpectedEndOfStream.into()); - } - } - } - } else if let Some(Ok(Spanned { span, value })) = next { - let bad_token = value; - return Err(ErrorKind::UnrecognizedToken( - Range::from((span.start, span.end)).expand_lines(2), - Range::from((span.start, span.end)), - bad_token.to_string(), - vec!["".to_string()], - ) - .into()); - } else if let Some(Err(e)) = next { - // Capture lexical errors from the initial lexing phase and bubble back directly - // - return Err(e); + loop { + if next.value == Token::ColonColon { + // module path of the form: + // [ :: ]*; + // + take_while!(next, Token::Whitespace(_), iter); + + let id = as_ident(next)?; + alias = id.to_string(); + rel_module_path.push(&alias); + take_while!(next, Token::Whitespace(_), iter); + match next.value { + Token::As => (), + Token::ColonColon => continue, + Token::Semi => break, + _ => { + return Err(urt(next, &["`as`", "`::`", "`;`"])); } - } else if let Some(Ok(Spanned { - value: Token::As, .. - })) = next - { - break; - } else if let Some(Ok(Spanned { - value: Token::Semi, .. - })) = next - { - break; - } else if let Some(Ok(Spanned { - value: bad_token, - span, - .. - })) = next - { - return Err(ErrorKind::UnrecognizedToken( - Range::from((span.start, span.end)).expand_lines(2), - Range::from((span.start, span.end)), - bad_token.to_string(), - vec!["::".to_string(), "as".to_string(), ";".to_string()], - ) - .into()); } + } else if next.value == Token::As || next.value == Token::Semi { + break; + } else { + return Err(urt(next, &["`as`", "`::`", "`;`"])); } + } - // - // As - // + // + // As + // - if let Some(Ok(Spanned { - value: Token::As, .. - })) = next - { - take_while!(next, Token::Whitespace(_), iter); - if let Some(Ok(Spanned { - value: Token::Ident(alias_id, ..), - .. - })) = &next - { - alias = alias_id.to_string(); - } else if let Some(Ok(Spanned { - value: bad_token, - span, - .. - })) = next - { - return Err(ErrorKind::UnrecognizedToken( - Range::from((span.start, span.end)).expand_lines(2), - Range::from((span.start, span.end)), - bad_token.to_string(), - vec!["``".to_string()], - ) - .into()); - } + if next.value == Token::As { + take_while!(next, Token::Whitespace(_), iter); - // ';' - take_while!(next, Token::Whitespace(_), iter); + alias = as_ident(next)?.to_string(); - // Semi - take_while!(next, Token::Whitespace(_), iter); - } - } else if let Some(Ok(Spanned { - ref value, span, .. - })) = next - { - return Err(ErrorKind::UnrecognizedToken( - Range::from((span.start, span.end)).expand_lines(2), - Range::from((span.start, span.end)), - value.to_string(), - vec!["``".to_string()], - ) - .into()); + // ';' + take_while!(next, Token::Whitespace(_), iter); + + // Semi + take_while!(next, Token::Whitespace(_), iter); } - if let Some(Ok(Spanned { - value: Token::Semi, .. - })) = next - { + if next.value == Token::Semi { take_while!(next, Token::Whitespace(_), iter); } - if let Some(Ok(Spanned { - value: Token::NewLine, - span: span2, - .. - })) = next - { + if next.value == Token::NewLine { //let file_path = rel_module_path.clone(); let (inner_cu, file_path) = Preprocessor::resolve( module_path, use_span, - span2, + next.span, &rel_module_path, include_stack, )?; let file_path2 = file_path.clone(); - match file::open(&file_path) { - Ok(mut file) => { - let mut s = String::new(); - file.read_to_string(&mut s)?; - - s.push(' '); - let s = Preprocessor::preprocess( - &module_path, - file_path.as_os_str(), - &mut s, - inner_cu, - include_stack, - )?; - let y = s - .into_iter() - .filter_map(|x| x.map(|x| format!("{}", x.value)).ok()) - .collect::>() - .join(""); - input.push_str(&format!( - "#!line 0 0 0 {} {}\n", - inner_cu, - &file_path2.to_string_lossy() - )); - input.push_str(&format!("mod {} with\n", &alias)); - input.push_str(&format!( - "#!line 0 0 0 {} {}\n", - inner_cu, - &file_path2.to_string_lossy() - )); - input.push_str(&format!("{}\n", y.trim())); - input.push_str("end;\n"); - input.push_str(&format!( - "#!line {} {} {} {} {}\n", - span2.end.absolute(), - span2.end.line() + 1, - 0, - cu, - file_name.to_string_lossy(), - )); - } - Err(e) => { - return Err(e.into()); - } - } + let mut file = file::open(&file_path)?; + let mut s = String::new(); + file.read_to_string(&mut s)?; + + s.push(' '); + let s = Preprocessor::preprocess( + &module_path, + file_path.as_os_str(), + &mut s, + inner_cu, + include_stack, + )?; + let y = s + .into_iter() + .filter_map(|x| x.map(|x| format!("{}", x.value)).ok()) + .collect::>() + .join(""); + input.push_str(&format!( + "#!line 0 0 0 {} {}\n", + inner_cu, + &file_path2.to_string_lossy() + )); + input.push_str(&format!("mod {} with\n", &alias)); + input.push_str(&format!( + "#!line 0 0 0 {} {}\n", + inner_cu, + &file_path2.to_string_lossy() + )); + input.push_str(&format!("{}\n", y.trim())); + input.push_str("end;\n"); + input.push_str(&format!( + "#!line {} {} {} {} {}\n", + next.span.end.absolute(), + next.span.end.line() + 1, + 0, + cu, + file_name.to_string_lossy(), + )); + include_stack.pop(); - } else if let Some(Ok(Spanned { - value: bad_token, - span, - .. - })) = next - { - return Err(ErrorKind::UnrecognizedToken( - Range::from((span.start, span.end)).expand_lines(2), - Range::from((span.start, span.end)), - bad_token.to_string(), - vec!["".to_string()], - ) - .into()); + } else { + return Err(urt(next, &["``"])); } } - Some(Ok(other)) => { + Spanned { + value: Token::EndOfStream, + .. + } => break, + other => { input.push_str(&format!("{}", other.value)); } - Some(Err(e)) => { - return Err(e); - } - None => break, } } //input.push_str(" "); diff --git a/tremor-script/src/pos.rs b/tremor-script/src/pos.rs index 5d2acb48fb..569180b93c 100644 --- a/tremor-script/src/pos.rs +++ b/tremor-script/src/pos.rs @@ -120,6 +120,11 @@ impl Range { self.0.unit_id } } +impl From for Range { + fn from(s: Span) -> Self { + Range::from((s.start, s.end)) + } +} impl From<(Location, Location)> for Range { fn from(locs: (Location, Location)) -> Self { From 1b7ed38e9d978e0b0bc1d6645a53ee6799c0aa29 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Wed, 12 May 2021 15:58:48 +0200 Subject: [PATCH 5/8] Obey Clippy Signed-off-by: Heinz N. Gies --- tremor-script/src/lexer.rs | 30 ++++++++++++++++++------------ tremor-script/src/pos.rs | 7 ++++--- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/tremor-script/src/lexer.rs b/tremor-script/src/lexer.rs index 9f66181c4f..ba4456f309 100644 --- a/tremor-script/src/lexer.rs +++ b/tremor-script/src/lexer.rs @@ -51,7 +51,7 @@ impl ParserSource for str { } /// A token with a span to indicate its location -pub type TokenSpan<'input> = Spanned>; +pub type TokenSpan<'input> = Spanned<'input>; fn is_ws(ch: char) -> bool { ch.is_whitespace() && ch != '\n' @@ -393,6 +393,12 @@ pub enum Token<'input> { ConfigDirective, } +impl<'input> Default for Token<'input> { + fn default() -> Self { + Self::EndOfStream + } +} + impl<'input> Token<'input> { /// Is the token ignorable except when syntax or error highlighting. /// Is the token insignificant when parsing ( a correct ... ) source. @@ -553,7 +559,7 @@ impl<'input> Token<'input> { } // LALRPOP requires a means to convert spanned tokens to triple form -impl<'input> __ToTriple<'input> for Spanned> { +impl<'input> __ToTriple<'input> for Spanned<'input> { fn to_triple( value: Self, ) -> std::result::Result< @@ -810,7 +816,7 @@ impl<'input> Tokenizer<'input> { /// The purpose of this method is to not accidentally consume tokens after an error, /// which might be completely incorrect, but would still show up in the resulting iterator if /// used with `filter_map(Result::ok)` for example. - pub fn tokenize_until_err(self) -> impl Iterator>> { + pub fn tokenize_until_err(self) -> impl Iterator> { // i wanna use map_while here, but it is still unstable :( self.scan((), |_, item| item.ok()).fuse() } @@ -940,7 +946,7 @@ impl IncludeStack { } } -fn urt(next: Spanned>, alt: &[&'static str]) -> Error { +fn urt(next: &Spanned<'_>, alt: &[&'static str]) -> Error { ErrorKind::UnrecognizedToken( Range::from(next.span).expand_lines(2), Range::from(next.span), @@ -950,11 +956,11 @@ fn urt(next: Spanned>, alt: &[&'static str]) -> Error { .into() } -fn as_ident(next: Spanned>) -> Result> { +fn as_ident(next: Spanned<'_>) -> Result> { if let Token::Ident(id, _) = next.value { Ok(id) } else { - Err(urt(next, &["``"])) + Err(urt(&next, &["``"])) } } impl<'input> Preprocessor { @@ -1047,13 +1053,13 @@ impl<'input> Preprocessor { Token::ColonColon => continue, Token::Semi => break, _ => { - return Err(urt(next, &["`as`", "`::`", "`;`"])); + return Err(urt(&next, &["`as`", "`::`", "`;`"])); } } } else if next.value == Token::As || next.value == Token::Semi { break; } else { - return Err(urt(next, &["`as`", "`::`", "`;`"])); + return Err(urt(&next, &["`as`", "`::`", "`;`"])); } } @@ -1129,7 +1135,7 @@ impl<'input> Preprocessor { include_stack.pop(); } else { - return Err(urt(next, &["``"])); + return Err(urt(&next, &["``"])); } } Spanned { @@ -1162,12 +1168,12 @@ pub struct Lexer<'input> { type Lexeme = Option<(Location, char)>; impl<'input> Lexer<'input> { - pub(crate) fn spanned2( + pub(crate) fn spanned2( &self, mut start: Location, mut end: Location, - value: T, - ) -> Spanned { + value: Token<'input>, + ) -> Spanned<'input> { start.unit_id = self.cu; end.unit_id = self.cu; Spanned { diff --git a/tremor-script/src/pos.rs b/tremor-script/src/pos.rs index 569180b93c..fb3f92493e 100644 --- a/tremor-script/src/pos.rs +++ b/tremor-script/src/pos.rs @@ -15,6 +15,7 @@ // Copyright of original code is with original authors. Source cited below: // [libsyntax_pos]: https://github.com/rust-lang/rust/blob/master/src/libsyntax_pos/lib.rs +use super::lexer::Token; pub use codespan::{ ByteIndex as BytePos, ByteOffset, ColumnIndex as Column, ColumnOffset, LineIndex as Line, LineOffset, @@ -94,12 +95,12 @@ pub(crate) fn span(start: Location, end: Location, pp_start: Location, pp_end: L } /// A Spanned element, position plus element -#[derive(Copy, Clone, Debug, Eq, PartialEq, Default)] -pub struct Spanned { +#[derive(Clone, Debug, PartialEq, Default)] +pub struct Spanned<'tkn> { /// The span pub span: Span, /// The token - pub value: T, + pub value: Token<'tkn>, } /// A range in a file between two locations From c792eb962e679885b5c7f76effe7893b5a049987 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Wed, 12 May 2021 17:47:46 +0200 Subject: [PATCH 6/8] more cleanup in lexer Signed-off-by: Heinz N. Gies --- tremor-cli/src/debug.rs | 2 +- tremor-script/src/lexer.rs | 997 ++++++++++++++++++------------------- 2 files changed, 478 insertions(+), 521 deletions(-) diff --git a/tremor-cli/src/debug.rs b/tremor-cli/src/debug.rs index ef9ac908a2..02133078e6 100644 --- a/tremor-cli/src/debug.rs +++ b/tremor-cli/src/debug.rs @@ -69,7 +69,7 @@ where Ok(()) } -fn dbg_tokens(h: &mut W, lexemes: Vec>) -> Result<()> +fn dbg_tokens(h: &mut W, lexemes: Vec) -> Result<()> where W: Highlighter, { diff --git a/tremor-script/src/lexer.rs b/tremor-script/src/lexer.rs index ba4456f309..4844eec7e0 100644 --- a/tremor-script/src/lexer.rs +++ b/tremor-script/src/lexer.rs @@ -1417,45 +1417,42 @@ impl<'input> Lexer<'input> { /// handle pattern begin fn pb(&mut self, start: Location) -> Result> { - match self.lookahead() { - Some((end, '[')) => { + match self.lookahead().ok_or(ErrorKind::UnexpectedEndOfStream)? { + (end, '[') => { self.bump(); Ok(self.spanned2(start, end + '[', Token::LPatBracket)) } - Some((end, '(')) => { + (end, '(') => { self.bump(); Ok(self.spanned2(start, end + '(', Token::LPatParen)) } - Some((end, '{')) => { + (end, '{') => { self.bump(); Ok(self.spanned2(start, end + '{', Token::LPatBrace)) } - Some((end, _)) => Ok(self.spanned2(start, end, Token::Mod)), - None => Err(ErrorKind::UnexpectedEndOfStream.into()), + (end, _) => Ok(self.spanned2(start, end, Token::Mod)), } } /// handle pattern end fn pe(&mut self, start: Location) -> Result> { - match self.lookahead() { - Some((end, '=')) => { + match self.lookahead().ok_or(ErrorKind::UnexpectedEndOfStream)? { + (end, '=') => { self.bump(); Ok(self.spanned2(start, end + '=', Token::NotEq)) } - Some((end, _ch)) => Ok(self.spanned2(start, end, Token::BitNot)), - None => Err(ErrorKind::UnexpectedEndOfStream.into()), + (end, _ch) => Ok(self.spanned2(start, end, Token::BitNot)), } } /// handle tilde fn tl(&mut self, start: Location) -> Result> { - match self.lookahead() { - Some((end, '=')) => { + match self.lookahead().ok_or(ErrorKind::UnexpectedEndOfStream)? { + (end, '=') => { self.bump(); Ok(self.spanned2(start, end + '=', Token::TildeEq)) } - Some((end, _)) => Ok(self.spanned2(start, end, Token::Tilde)), - None => Err(ErrorKind::UnexpectedEndOfStream.into()), + (end, _) => Ok(self.spanned2(start, end, Token::Tilde)), } } @@ -1468,10 +1465,8 @@ impl<'input> Lexer<'input> { } fn next_index(&mut self) -> Result { - match self.chars.next() { - Some((loc, _)) => Ok(loc), - None => Err(ErrorKind::UnexpectedEndOfStream.into()), - } + let (loc, _) = self.chars.next().ok_or(ErrorKind::UnexpectedEndOfStream)?; + Ok(loc) } fn escape_code( @@ -1479,21 +1474,21 @@ impl<'input> Lexer<'input> { string_start: &Location, start: Location, ) -> Result<(Location, char)> { - match self.bump() { - Some((e, '\'')) => Ok((e, '\'')), - Some((e, '"')) => Ok((e, '"')), - Some((e, '\\')) => Ok((e, '\\')), + match self.bump().ok_or(ErrorKind::UnexpectedEndOfStream)? { + (e, '\'') => Ok((e, '\'')), + (e, '"') => Ok((e, '"')), + (e, '\\') => Ok((e, '\\')), // Some((e, '{')) => Ok((e, '{')), // Some((e, '}')) => Ok((e, '}')), - Some((e, '#')) => Ok((e, '#')), - Some((e, '/')) => Ok((e, '/')), - Some((e, 'b')) => Ok((e, '\u{8}')), // Backspace - Some((e, 'f')) => Ok((e, '\u{c}')), // Form Feed - Some((e, 'n')) => Ok((e, '\n')), - Some((e, 'r')) => Ok((e, '\r')), - Some((e, 't')) => Ok((e, '\t')), + (e, '#') => Ok((e, '#')), + (e, '/') => Ok((e, '/')), + (e, 'b') => Ok((e, '\u{8}')), // Backspace + (e, 'f') => Ok((e, '\u{c}')), // Form Feed + (e, 'n') => Ok((e, '\n')), + (e, 'r') => Ok((e, '\r')), + (e, 't') => Ok((e, '\t')), // TODO: Unicode escape codes - Some((mut end, 'u')) => { + (mut end, 'u') => { let mut escape_start = end; escape_start.extend_left('u'); let mut digits = String::with_capacity(4); @@ -1546,7 +1541,7 @@ impl<'input> Lexer<'input> { .into()) } } - Some((mut end, ch)) => { + (mut end, ch) => { let token_str = self .slice_full_lines(string_start, &end) .unwrap_or_default(); @@ -1560,7 +1555,6 @@ impl<'input> Lexer<'input> { ) .into()) } - None => Err(ErrorKind::UnexpectedEndOfStream.into()), } } @@ -1570,8 +1564,15 @@ impl<'input> Lexer<'input> { let mut end = start; loop { - match self.bump() { - Some((mut end, '`')) => { + match self.bump().ok_or_else(|| { + let range = Range::from((start, end)); + ErrorKind::UnterminatedIdentLiteral( + range.expand_lines(2), + range, + UnfinishedToken::new(range, format!("`{}", string)), + ) + })? { + (mut end, '`') => { // we got to bump end by one so we claim the tailing `"` let e = end; let mut s = start; @@ -1585,7 +1586,7 @@ impl<'input> Lexer<'input> { let token = Token::Ident(string.into(), true); return Ok(self.spanned2(start, end, token)); } - Some((end, '\n')) => { + (end, '\n') => { let range = Range::from((start, end)); return Err(ErrorKind::UnterminatedIdentLiteral( range.expand_lines(2), @@ -1595,20 +1596,11 @@ impl<'input> Lexer<'input> { .into()); } - Some((e, other)) => { + (e, other) => { string.push(other as char); end = e; continue; } - None => { - let range = Range::from((start, end)); - return Err(ErrorKind::UnterminatedIdentLiteral( - range.expand_lines(2), - range, - UnfinishedToken::new(range, format!("`{}", string)), - ) - .into()); - } } } } @@ -1623,70 +1615,158 @@ impl<'input> Lexer<'input> { let mut res = vec![q1]; let mut string = String::new(); - match self.lookahead() { + if let (mut end, '"') = self.lookahead().ok_or_else(|| { + let range = Range::from((start, start)); + ErrorKind::UnterminatedStringLiteral( + range.expand_lines(2), + range, + UnfinishedToken::new(range, format!("\"{}", string)), + ) + })? { // This would be the second quote - Some((mut end, '"')) => { + self.bump(); + if let Some((end, '"')) = self.lookahead() { self.bump(); - if let Some((end, '"')) = self.lookahead() { - self.bump(); - // We don't allow anything tailing the initial `"""` - match self.bump() { - Some((mut newline_loc, '\n')) => { - res = vec![self.spanned2(start, end + '"', Token::HereDocStart)]; // (0, vec![]))]; - string = String::new(); - newline_loc.shift('\n'); // content starts after newline - self.hd(heredoc_start, newline_loc, end, false, &string, res) - } - Some((end, ch)) => { - let token_str = self - .slice_until_eol(&start) - .map_or_else(|| format!(r#""""{}"#, ch), ToString::to_string); - let range = Range::from((start, end)); - Err(ErrorKind::TailingHereDoc( - range.expand_lines(2), - range, - UnfinishedToken::new(range, token_str), - ch, - ) - .into()) - } - None => { - let token_str = self - .slice_until_eol(&start) - .map_or_else(|| r#"""""#.to_string(), ToString::to_string); - let range = Range::from((start, end)); - Err(ErrorKind::UnterminatedHereDoc( - range.expand_lines(2), - range, - UnfinishedToken::new(range, token_str), - ) - .into()) - } + // We don't allow anything tailing the initial `"""` + match self.bump().ok_or_else(|| { + let token_str = self + .slice_until_eol(&start) + .map_or_else(|| r#"""""#.to_string(), ToString::to_string); + let range = Range::from((start, end)); + ErrorKind::UnterminatedHereDoc( + range.expand_lines(2), + range, + UnfinishedToken::new(range, token_str), + ) + })? { + (mut newline_loc, '\n') => { + res = vec![self.spanned2(start, end + '"', Token::HereDocStart)]; // (0, vec![]))]; + string = String::new(); + newline_loc.shift('\n'); // content starts after newline + self.hd(heredoc_start, newline_loc, end, false, &string, res) } + (end, ch) => { + let token_str = self + .slice_until_eol(&start) + .map_or_else(|| format!(r#""""{}"#, ch), ToString::to_string); + let range = Range::from((start, end)); + Err(ErrorKind::TailingHereDoc( + range.expand_lines(2), + range, + UnfinishedToken::new(range, token_str), + ch, + ) + .into()) + } + } + } else { + // We had two quotes followed by something not a quote so + // it is an empty string. + //TODO :make slice + let start = end; + end.shift('"'); + res.push(self.spanned2(start, end, Token::DQuote)); + Ok(res) + } + } else { + self.qs(heredoc_start, end, end, false, string, res) + } + } + + #[allow(clippy::too_many_arguments)] + fn intercept_heredoc_error( + &self, + is_hd: bool, + error_prefix: &str, + total_start: Location, + segment_start: &mut Location, + end: &mut Location, + res: &mut Vec>, + content: &mut String, + error: Error, + ) -> ErrorKind { + // intercept error and extend the token to match this outer heredoc + // with interpolation + // otherwise we will not get the whole heredoc in error messages + + let end_location = error.context().1.map_or_else( + || res.last().map_or(*end, |last| last.span.end), + |inner_error_range| inner_error_range.1, + ); + + let Error(kind, ..) = error; + + let token_str = self + .slice_full_lines(&total_start, &end_location) + .unwrap_or_else(|| format!("{}{}", error_prefix, content)); + let mut end = total_start; + end.shift_str(&token_str); + let unfinished_token = + UnfinishedToken::new(Range::from((total_start, end_location)), token_str); + match kind { + ErrorKind::UnterminatedExtractor(outer, location, _) => { + // expand to start line of heredoc, so we get a proper context + let outer = outer.expand_lines(outer.0.line().saturating_sub(total_start.line())); + ErrorKind::UnterminatedExtractor(outer, location, unfinished_token) + } + ErrorKind::UnterminatedIdentLiteral(outer, location, _) => { + // expand to start line of heredoc, so we get a proper context + let outer = outer.expand_lines(outer.0.line().saturating_sub(total_start.line())); + ErrorKind::UnterminatedIdentLiteral(outer, location, unfinished_token) + } + ErrorKind::UnterminatedHereDoc(outer, location, _) + | ErrorKind::TailingHereDoc(outer, location, _, _) => { + if is_hd { + // unterminated heredocs within interpolation are better reported + // as unterminated interpolation + ErrorKind::UnterminatedInterpolation( + Range::from((total_start, end.move_down_lines(2))), + Range::from((total_start, end)), + unfinished_token, + ) } else { - // We had two quotes followed by something not a quote so - // it is an empty string. - //TODO :make slice - let start = end; - end.shift('"'); - res.push(self.spanned2(start, end, Token::DQuote)); - Ok(res) + let outer = + outer.expand_lines(outer.0.line().saturating_sub(total_start.line())); + + ErrorKind::UnterminatedHereDoc(outer, location, unfinished_token) } } - Some(_) => self.qs(heredoc_start, end, end, false, string, res), - None => { - let range = Range::from((start, start)); - Err(ErrorKind::UnterminatedStringLiteral( - range.expand_lines(2), - range, - UnfinishedToken::new(range, format!("\"{}", string)), - ) - .into()) + ErrorKind::UnterminatedInterpolation(outer, location, _) => { + // expand to start line of heredoc, so we get a proper context + let outer = outer.expand_lines(outer.0.line().saturating_sub(total_start.line())); + ErrorKind::UnterminatedInterpolation(outer, location, unfinished_token) } + ErrorKind::UnexpectedEscapeCode(outer, location, _, found) => { + // expand to start line of heredoc, so we get a proper context + let outer = outer.expand_lines(outer.0.line().saturating_sub(total_start.line())); + ErrorKind::UnexpectedEscapeCode(outer, location, unfinished_token, found) + } + ErrorKind::UnterminatedStringLiteral(outer, location, _) => { + // expand to start line of heredoc, so we get a proper context + let outer = outer.expand_lines(outer.0.line().saturating_sub(total_start.line())); + if is_hd { + ErrorKind::UnterminatedStringLiteral(outer, location, unfinished_token) + } else { + let mut toekn_end = *segment_start; + toekn_end.shift('#'); + toekn_end.shift('{'); + ErrorKind::UnterminatedInterpolation( + outer, + Range::from((*segment_start, toekn_end)), + unfinished_token, + ) + } + } + ErrorKind::InvalidUtf8Sequence(outer, location, _) => { + // expand to start line of heredoc, so we get a proper context + let outer = outer.expand_lines(outer.0.line().saturating_sub(total_start.line())); + ErrorKind::InvalidUtf8Sequence(outer, location, unfinished_token) + } + e => e, } } - #[allow(clippy::too_many_lines, clippy::clippy::too_many_arguments)] + #[allow(clippy::too_many_arguments)] fn handle_interpol( &mut self, is_hd: bool, @@ -1725,160 +1805,69 @@ impl<'input> Lexer<'input> { let mut pcount = 0; let mut first = true; loop { - match self.next() { - Some(Ok(s)) => { - match &s.value { - Token::RBrace if pcount == 0 => { - let start = *segment_start; - *segment_start = s.span.pp_end; - - res.push(s); - if first { - let end_location = *segment_start; - let token_str = self - .slice_full_lines(&total_start, &end_location) - .unwrap_or_else(|| format!("{}{}", error_prefix, content)); - - let unfinished_token = UnfinishedToken::new( - Range::from((start, end_location)), - token_str, - ); - - return Err(ErrorKind::EmptyInterpolation( - Range::from((total_start, end_location)), - Range::from((start, end_location)), - unfinished_token, - ) - .into()); - } - break; - } - Token::RBrace => { - pcount -= 1; - } - Token::LBrace | Token::Interpol => { - pcount += 1; - } - _ => {} - }; - res.push(s); - first = false; - } - // intercept error and extend the token to match this outer heredoc - // with interpolation - // otherwise we will not get the whole heredoc in error messages - Some(Err(error)) => { - let end_location = error.context().1.map_or_else( - || res.last().map_or(*end, |last| last.span.end), - |inner_error_range| inner_error_range.1, - ); + let next = self.next().ok_or_else(|| { + let end_location = self.chars.current(); + let token_str = self + .slice_full_lines(&total_start, end_location) + .unwrap_or_else(|| format!("{}{}", error_prefix, content)); + ErrorKind::UnterminatedInterpolation( + Range::from((total_start, end.move_down_lines(2))), + Range::from((*segment_start, *end)), + UnfinishedToken::new(Range::from((total_start, *end_location)), token_str), + ) + })?; + + let s = next.map_err(|error| { + self.intercept_heredoc_error( + is_hd, + error_prefix, + total_start, + segment_start, + end, + res, + content, + error, + ) + })?; + match &s.value { + Token::RBrace if pcount == 0 => { + let start = *segment_start; + *segment_start = s.span.pp_end; - let Error(kind, ..) = error; + res.push(s); + if first { + let end_location = *segment_start; + let token_str = self + .slice_full_lines(&total_start, &end_location) + .unwrap_or_else(|| format!("{}{}", error_prefix, content)); - let token_str = self - .slice_full_lines(&total_start, &end_location) - .unwrap_or_else(|| format!("{}{}", error_prefix, content)); - let mut end = total_start; - end.shift_str(&token_str); - let unfinished_token = - UnfinishedToken::new(Range::from((total_start, end_location)), token_str); - let error = match kind { - ErrorKind::UnterminatedExtractor(outer, location, _) => { - // expand to start line of heredoc, so we get a proper context - let outer = outer - .expand_lines(outer.0.line().saturating_sub(total_start.line())); - ErrorKind::UnterminatedExtractor(outer, location, unfinished_token) - } - ErrorKind::UnterminatedIdentLiteral(outer, location, _) => { - // expand to start line of heredoc, so we get a proper context - let outer = outer - .expand_lines(outer.0.line().saturating_sub(total_start.line())); - ErrorKind::UnterminatedIdentLiteral(outer, location, unfinished_token) - } - ErrorKind::UnterminatedHereDoc(outer, location, _) - | ErrorKind::TailingHereDoc(outer, location, _, _) => { - if is_hd { - // unterminated heredocs within interpolation are better reported - // as unterminated interpolation - ErrorKind::UnterminatedInterpolation( - Range::from((total_start, end.move_down_lines(2))), - Range::from((total_start, end)), - unfinished_token, - ) - } else { - let outer = outer.expand_lines( - outer.0.line().saturating_sub(total_start.line()), - ); + let unfinished_token = + UnfinishedToken::new(Range::from((start, end_location)), token_str); - ErrorKind::UnterminatedHereDoc(outer, location, unfinished_token) - } - } - ErrorKind::UnterminatedInterpolation(outer, location, _) => { - // expand to start line of heredoc, so we get a proper context - let outer = outer - .expand_lines(outer.0.line().saturating_sub(total_start.line())); - ErrorKind::UnterminatedInterpolation(outer, location, unfinished_token) - } - ErrorKind::UnexpectedEscapeCode(outer, location, _, found) => { - // expand to start line of heredoc, so we get a proper context - let outer = outer - .expand_lines(outer.0.line().saturating_sub(total_start.line())); - ErrorKind::UnexpectedEscapeCode( - outer, - location, - unfinished_token, - found, - ) - } - ErrorKind::UnterminatedStringLiteral(outer, location, _) => { - // expand to start line of heredoc, so we get a proper context - let outer = outer - .expand_lines(outer.0.line().saturating_sub(total_start.line())); - if is_hd { - ErrorKind::UnterminatedStringLiteral( - outer, - location, - unfinished_token, - ) - } else { - let mut toekn_end = *segment_start; - toekn_end.shift('#'); - toekn_end.shift('{'); - ErrorKind::UnterminatedInterpolation( - outer, - Range::from((*segment_start, toekn_end)), - unfinished_token, - ) - } - } - ErrorKind::InvalidUtf8Sequence(outer, location, _) => { - // expand to start line of heredoc, so we get a proper context - let outer = outer - .expand_lines(outer.0.line().saturating_sub(total_start.line())); - ErrorKind::InvalidUtf8Sequence(outer, location, unfinished_token) - } - e => e, - }; - return Err(error.into()); + return Err(ErrorKind::EmptyInterpolation( + Range::from((total_start, end_location)), + Range::from((start, end_location)), + unfinished_token, + ) + .into()); + } + break; } - None => { - let end_location = self.chars.current(); - let token_str = self - .slice_full_lines(&total_start, end_location) - .unwrap_or_else(|| format!("{}{}", error_prefix, content)); - return Err(ErrorKind::UnterminatedInterpolation( - Range::from((total_start, end.move_down_lines(2))), - Range::from((*segment_start, *end)), - UnfinishedToken::new(Range::from((total_start, *end_location)), token_str), - ) - .into()); + Token::RBrace => { + pcount -= 1; } - } + Token::LBrace | Token::Interpol => { + pcount += 1; + } + _ => {} + }; + res.push(s); + first = false; } Ok(()) } - #[allow(clippy::clippy::too_many_arguments)] + #[allow(clippy::too_many_arguments)] fn handle_qs_hd_generic( &mut self, is_hd: bool, @@ -1966,8 +1955,20 @@ impl<'input> Lexer<'input> { // TODO: deduplicate by encapsulating all state in a struct and have some common operations on it let mut content = String::new(); loop { - match self.bump() { - Some((e, '"')) => { + let next = self.bump().ok_or_else(|| { + // We reached EOF + let token_str = self + .slice_until_eof(&heredoc_start) + .map_or_else(|| format!(r#""""\n{}"#, content), ToString::to_string); + let range = Range::from((heredoc_start, end)); + ErrorKind::UnterminatedHereDoc( + range.expand_lines(2), + range, + UnfinishedToken::new(range, token_str), + ) + })?; + match next { + (e, '"') => { // If the current line is just a `"""` then we are at the end of the heredoc res.push(self.spanned2( segment_start, @@ -1993,7 +1994,7 @@ impl<'input> Lexer<'input> { }; end.shift('"'); } - Some((e, '\n')) => { + (e, '\n') => { end = e; content.push('\n'); res.push(self.spanned2( @@ -2005,7 +2006,7 @@ impl<'input> Lexer<'input> { segment_start = end; content = String::new(); } - Some(lc) => { + lc => { self.handle_qs_hd_generic( true, "\"\"\"\n", @@ -2019,24 +2020,10 @@ impl<'input> Lexer<'input> { Token::HereDocLiteral, )?; } - None => { - // We reached EOF - let token_str = self - .slice_until_eof(&heredoc_start) - .map_or_else(|| format!(r#""""\n{}"#, content), ToString::to_string); - let range = Range::from((heredoc_start, end)); - return Err(ErrorKind::UnterminatedHereDoc( - range.expand_lines(2), - range, - UnfinishedToken::new(range, token_str), - ) - .into()); - } } } } - #[allow(clippy::too_many_lines)] /// Handle quote strings `"` ... fn qs( &mut self, @@ -2048,8 +2035,12 @@ impl<'input> Lexer<'input> { mut res: Vec>, ) -> Result>> { loop { - match self.bump() { - Some((mut end, '"')) => { + let next = self + .bump() + .ok_or_else(|| self.unfinished_token("\"", &string, total_start))?; + + match next { + (mut end, '"') => { // If the string is empty we kind of don't need it. if !string.is_empty() { let token = if has_escapes { @@ -2068,7 +2059,7 @@ impl<'input> Lexer<'input> { res.push(self.spanned2(start, end, Token::DQuote)); return Ok(res); } - Some((end, '\n')) => { + (end, '\n') => { let token_str = self .slice_until_eol(&total_start) .map_or_else(|| format!("\"{}", string), ToString::to_string); @@ -2083,7 +2074,7 @@ impl<'input> Lexer<'input> { ) .into()); } - Some(lc) => { + lc => { self.handle_qs_hd_generic( false, "\"", @@ -2097,30 +2088,16 @@ impl<'input> Lexer<'input> { Token::StringLiteral, )?; } - None => { - let token_str = self - .slice_until_eol(&total_start) - .map_or_else(|| format!("\"{}", string), ToString::to_string); - let mut token_end = total_start; - token_end.shift_str(&token_str); - let range = Range::from((total_start, end)); - return Err(ErrorKind::UnterminatedStringLiteral( - range.expand_lines(2), - range, - UnfinishedToken::new(Range::from((total_start, token_end)), token_str), - ) - .into()); - } } } } fn test_escape_code(&mut self, start: &Location, s: &str) -> Result> { - match self.bump() { - Some((_, '\\')) => Ok(Some('\\')), - Some((_, '|')) => Ok(Some('|')), - Some((_end, '\n')) => Ok(None), - Some((end, ch)) => { + match self.bump().ok_or(ErrorKind::UnexpectedEndOfStream)? { + (_, '\\') => Ok(Some('\\')), + (_, '|') => Ok(Some('|')), + (_end, '\n') => Ok(None), + (end, ch) => { let token_str = format!("|{}\\{}", s, ch); let mut token_end = end; token_end.shift(ch); @@ -2133,28 +2110,42 @@ impl<'input> Lexer<'input> { ) .into()) } - None => Err(ErrorKind::UnexpectedEndOfStream.into()), } } + fn unfinished_token(&self, pfx: &str, string: &str, start: Location) -> ErrorKind { + let token_str = self + .slice_until_eol(&start) + .map_or_else(|| format!("{}{}", pfx, string), ToString::to_string); + let mut token_end = start; + token_end.shift_str(&token_str); + let range = Range::from((start, token_end)); + ErrorKind::UnterminatedExtractor( + range.expand_lines(2), + range, + UnfinishedToken::new(Range::from((start, token_end)), token_str), + ) + } + /// handle test/extractor '|...' - fn pl(&mut self, start: Location) -> Result> { + fn pl(&mut self, total_start: Location) -> Result> { let mut string = String::new(); let mut strings = Vec::new(); - let mut end = start; loop { - match self.bump() { - Some((e, '\\')) => { - if let Some(ch) = self.test_escape_code(&start, &string)? { + let next = self + .bump() + .ok_or_else(|| self.unfinished_token("|", &string, total_start))?; + match next { + (_e, '\\') => { + if let Some(ch) = self.test_escape_code(&total_start, &string)? { string.push(ch); } else { strings.push(string); string = String::new(); } - end = e; } - Some((mut end, '|')) => { + (mut end, '|') => { strings.push(string); end.shift('|'); let indent = indentation(&strings); @@ -2169,13 +2160,13 @@ impl<'input> Lexer<'input> { }) .collect(); let token = Token::TestLiteral(indent, strings); - return Ok(self.spanned2(start, end, token)); + return Ok(self.spanned2(total_start, end, token)); } - Some((end, '\n')) => { + (end, '\n') => { let token_str = self - .slice_until_eol(&start) + .slice_until_eol(&total_start) .map_or_else(|| format!("|{}", string), ToString::to_string); - let range = Range::from((start, end)); + let range = Range::from((total_start, end)); return Err(ErrorKind::UnterminatedExtractor( range.expand_lines(2), range, @@ -2183,100 +2174,33 @@ impl<'input> Lexer<'input> { ) .into()); } - Some((e, other)) => { + (_e, other) => { string.push(other as char); - end = e; continue; } - None => { - let token_str = self - .slice_until_eol(&start) - .map_or_else(|| format!("|{}", string), ToString::to_string); - let mut token_end = start; - token_end.shift_str(&token_str); - end.shift(' '); - let range = Range::from((start, end)); - return Err(ErrorKind::UnterminatedExtractor( - range.expand_lines(2), - range, - UnfinishedToken::new(Range::from((start, token_end)), token_str), - ) - .into()); - } } } } - /// handle numbers (with or without leading '-') - #[allow(clippy::too_many_lines)] - fn nm(&mut self, start: Location) -> Result> { - let (end, int) = self.extract_number(start, is_dec_digit); - let (start, end, token) = match self.lookahead() { - Some((_, '.')) => { - self.bump(); // Skip '.' - let (end, float) = self.extract_number(start, is_dec_digit); - match self.lookahead() { - Some((_, 'e')) => { - self.bump(); - if let Some((exp_location, _)) = self.bump() { - // handle sign - let (exp_location, sign) = match self.lookahead() { - Some((loc, '+')) | Some((loc, '-')) => { - self.bump(); - self.slice(exp_location, loc).map(|s| (loc, s)) - } - _ => Some((exp_location, "")), - } - .unwrap_or((exp_location, "")); - let (end, exp) = self.extract_number(exp_location, is_dec_digit); - let float = &format!("{}e{}{}", float, sign, exp); - ( - start, - end, - Token::FloatLiteral( - float.parse().chain_err(|| { - ErrorKind::InvalidFloatLiteral( - Range::from((start, end)).expand_lines(2), - Range::from((start, end)), - UnfinishedToken::new( - Range::from((start, end)), - self.slice_until_eol(&start).map_or_else( - || float.to_string(), - ToString::to_string, - ), - ), - ) - })?, - float.to_string(), - ), - ) - } else { - return Err(ErrorKind::InvalidFloatLiteral( - Range::from((start, end)).expand_lines(2), - Range::from((start, end)), - UnfinishedToken::new( - Range::from((start, end)), - self.slice_until_eol(&start) - .map_or_else(|| float.to_string(), ToString::to_string), - ), - ) - .into()); + fn nm_float(&mut self, start: Location, int: &str) -> Result> { + self.bump(); // Skip '.' + let (end, float) = self.extract_number(start, is_dec_digit); + match self.lookahead() { + Some((_, 'e')) => { + self.bump(); + if let Some((exp_location, _)) = self.bump() { + // handle sign + let (exp_location, sign) = match self.lookahead() { + Some((loc, '+')) | Some((loc, '-')) => { + self.bump(); + self.slice(exp_location, loc).map(|s| (loc, s)) } + _ => Some((exp_location, "")), } - Some((end, ch)) if is_ident_start(ch) => { - return Err(ErrorKind::UnexpectedCharacter( - Range::from((start, end)).expand_lines(2), - Range::from((end, end)), - UnfinishedToken::new( - Range::from((start, end)), - self.slice_until_eol(&start) - .map_or_else(|| int.to_string(), ToString::to_string), - ), - ch, - ) - .into()); - } - _ => ( + .unwrap_or((exp_location, "")); + let (end, exp) = self.extract_number(exp_location, is_dec_digit); + let float = &format!("{}e{}{}", float, sign, exp); + Ok(self.spanned2( start, end, Token::FloatLiteral( @@ -2293,95 +2217,137 @@ impl<'input> Lexer<'input> { })?, float.to_string(), ), - ), + )) + } else { + Err(ErrorKind::InvalidFloatLiteral( + Range::from((start, end)).expand_lines(2), + Range::from((start, end)), + UnfinishedToken::new( + Range::from((start, end)), + self.slice_until_eol(&start) + .map_or_else(|| float.to_string(), ToString::to_string), + ), + ) + .into()) } } - Some((_, 'x')) => { - self.bump(); // Skip 'x' - let int_start = self.next_index()?; - let (end, hex) = self.extract_number(int_start, is_hex); - // ALLOW: this takes the whole string and can not panic - match &int[..] { - "0" | "-0" => match self.lookahead() { - Some((_, ch)) if is_ident_start(ch) => { - return Err(ErrorKind::UnexpectedCharacter( - Range::from((start, end)).expand_lines(2), - Range::from((start, end)), - UnfinishedToken::new( - Range::from((start, end)), - self.slice_until_eol(&start) - .map_or_else(|| hex.to_string(), ToString::to_string), - ), - ch, - ) - .into()); - } - _ => { - if hex.is_empty() { - return Err(ErrorKind::InvalidHexLiteral( - Range::from((start, end)).expand_lines(2), - Range::from((start, end)), - UnfinishedToken::new( - Range::from((start, end)), - self.slice_until_eol(&start) - .map_or_else(|| hex.to_string(), ToString::to_string), - ), - ) - .into()); - } - let is_positive = int == "0"; - // ALLOW: this takes the whole string and can not panic - match i64_from_hex(&hex[..], is_positive) { - Ok(val) => (start, end, Token::IntLiteral(val)), - Err(_err) => { - return Err(ErrorKind::InvalidHexLiteral( - Range::from((start, end)).expand_lines(2), - Range::from((start, end)), - UnfinishedToken::new( - Range::from((start, end)), - self.slice_until_eol(&start).map_or_else( - || hex.to_string(), - ToString::to_string, - ), - ), - ) - .into()); - } - } - } - }, - _ => { - return Err(ErrorKind::InvalidHexLiteral( + Some((end, ch)) if is_ident_start(ch) => Err(ErrorKind::UnexpectedCharacter( + Range::from((start, end)).expand_lines(2), + Range::from((end, end)), + UnfinishedToken::new( + Range::from((start, end)), + self.slice_until_eol(&start) + .map_or_else(|| int.to_string(), ToString::to_string), + ), + ch, + ) + .into()), + _ => Ok(self.spanned2( + start, + end, + Token::FloatLiteral( + float.parse().chain_err(|| { + ErrorKind::InvalidFloatLiteral( Range::from((start, end)).expand_lines(2), Range::from((start, end)), UnfinishedToken::new( Range::from((start, end)), self.slice_until_eol(&start) - .map_or_else(|| int.to_string(), ToString::to_string), + .map_or_else(|| float.to_string(), ToString::to_string), ), ) - .into()); - } - } - } - Some((char_loc, ch)) if is_ident_start(ch) => { - return Err(ErrorKind::UnexpectedCharacter( + })?, + float.to_string(), + ), + )), + } + } + + fn nm_hex(&mut self, start: Location, int: &str) -> Result> { + self.bump(); // Skip 'x' + let int_start = self.next_index()?; + let (end, hex) = self.extract_number(int_start, is_hex); + // ALLOW: this takes the whole string and can not panic + match int { + "0" | "-0" => match self.lookahead() { + Some((_, ch)) if is_ident_start(ch) => Err(ErrorKind::UnexpectedCharacter( Range::from((start, end)).expand_lines(2), - Range::from((char_loc, char_loc)), + Range::from((start, end)), UnfinishedToken::new( Range::from((start, end)), self.slice_until_eol(&start) - .map_or_else(|| int.to_string(), ToString::to_string), + .map_or_else(|| hex.to_string(), ToString::to_string), ), ch, ) - .into()); - } - None | Some(_) => { - if let Ok(val) = int.parse() { - (start, end, Token::IntLiteral(val)) - } else { - return Err(ErrorKind::InvalidIntLiteral( + .into()), + _ => { + if hex.is_empty() { + Err(ErrorKind::InvalidHexLiteral( + Range::from((start, end)).expand_lines(2), + Range::from((start, end)), + UnfinishedToken::new( + Range::from((start, end)), + self.slice_until_eol(&start) + .map_or_else(|| hex.to_string(), ToString::to_string), + ), + ) + .into()) + } else { + let is_positive = int == "0"; + // ALLOW: this takes the whole string and can not panic + match i64_from_hex(&hex[..], is_positive) { + Ok(val) => Ok(self.spanned2(start, end, Token::IntLiteral(val))), + Err(_err) => Err(ErrorKind::InvalidHexLiteral( + Range::from((start, end)).expand_lines(2), + Range::from((start, end)), + UnfinishedToken::new( + Range::from((start, end)), + self.slice_until_eol(&start) + .map_or_else(|| hex.to_string(), ToString::to_string), + ), + ) + .into()), + } + } + } + }, + _ => Err(ErrorKind::InvalidHexLiteral( + Range::from((start, end)).expand_lines(2), + Range::from((start, end)), + UnfinishedToken::new( + Range::from((start, end)), + self.slice_until_eol(&start) + .map_or_else(|| int.to_string(), ToString::to_string), + ), + ) + .into()), + } + } + + /// handle numbers (with or without leading '-') + #[allow(clippy::too_many_lines)] + fn nm(&mut self, start: Location) -> Result> { + let (end, int) = self.extract_number(start, is_dec_digit); + match self.lookahead() { + Some((_, '.')) => self.nm_float(start, &int), + Some((_, 'x')) => self.nm_hex(start, &int), + Some((char_loc, ch)) if is_ident_start(ch) => Err(ErrorKind::UnexpectedCharacter( + Range::from((start, end)).expand_lines(2), + Range::from((char_loc, char_loc)), + UnfinishedToken::new( + Range::from((start, end)), + self.slice_until_eol(&start) + .map_or_else(|| int.to_string(), ToString::to_string), + ), + ch, + ) + .into()), + None | Some(_) => int + .parse() + .map(|val| self.spanned2(start, end, Token::IntLiteral(val))) + .map_err(|_| { + Error::from(ErrorKind::InvalidIntLiteral( Range::from((start, end)).expand_lines(2), Range::from((start, end)), UnfinishedToken::new( @@ -2389,13 +2355,9 @@ impl<'input> Lexer<'input> { self.slice_until_eol(&start) .map_or_else(|| int.to_string(), ToString::to_string), ), - ) - .into()); - } - } - }; - - Ok(self.spanned2(start, end, token)) + )) + }), + } } /// Consume whitespace @@ -2421,66 +2383,61 @@ impl<'input> Iterator for Lexer<'input> { if let Some(next) = self.stored_tokens.pop_front() { return Some(Ok(next)); } - let lexeme = self.bump(); - match lexeme { - None => None, - Some((start, ch)) => { - match ch as char { - // '...' => Some(Ok(self.spanned2(start, self.next_index(), Token::DotDotDot))), - // ".." => Some(Ok(self.spanned2(start, self.next_index(), Token::DotDot))), - ',' => Some(Ok(self.spanned2(start, start + ch, Token::Comma))), - '$' => Some(Ok(self.spanned2(start, start + ch, Token::Dollar))), - '.' => Some(Ok(self.spanned2(start, start + ch, Token::Dot))), - // '?' => Some(Ok(self.spanned2(start, start, Token::Question))), - '_' => Some(Ok(self.spanned2(start, start + ch, Token::DontCare))), - ';' => Some(Ok(self.spanned2(start, start + ch, Token::Semi))), - '+' => Some(Ok(self.spanned2(start, start + ch, Token::Add))), - '*' => Some(Ok(self.spanned2(start, start + ch, Token::Mul))), - '\\' => Some(Ok(self.spanned2(start, start + ch, Token::BSlash))), - '(' => Some(Ok(self.spanned2(start, start + ch, Token::LParen))), - ')' => Some(Ok(self.spanned2(start, start + ch, Token::RParen))), - '{' => Some(Ok(self.spanned2(start, start + ch, Token::LBrace))), - '}' => Some(Ok(self.spanned2(start, start + ch, Token::RBrace))), - '[' => Some(Ok(self.spanned2(start, start + ch, Token::LBracket))), - ']' => Some(Ok(self.spanned2(start, start + ch, Token::RBracket))), - '/' => Some(Ok(self.spanned2(start, start + ch, Token::Div))), - // TODO account for extractors which use | to mark format boundaries - //'|' => Some(Ok(self.spanned2(start, start, Token::BitOr))), - '^' => Some(Ok(self.spanned2(start, start + ch, Token::BitXor))), - '&' => Some(Ok(self.spanned2(start, start + ch, Token::BitAnd))), - ':' => Some(Ok(self.cn(start))), - '-' => match self.lookahead() { - Some((_loc, c)) if is_dec_digit(c) => Some(self.nm(start)), - _ => Some(Ok(self.spanned2(start, start + ch, Token::Sub))), - }, - '#' => Some(self.cx(start)), - '=' => Some(Ok(self.eq(start))), - '<' => Some(Ok(self.lt(start))), - '>' => Some(Ok(self.gt(start))), - '%' => Some(self.pb(start)), - '~' => Some(self.tl(start)), - '`' => Some(self.id2(start)), - // TODO account for bitwise not operator - '!' => Some(self.pe(start)), - '\n' => Some(Ok(self.spanned2(start, start, Token::NewLine))), - ch if is_ident_start(ch) => Some(Ok(self.id(start))), - '"' => match self.qs_or_hd(start) { - Ok(mut tokens) => { - for t in tokens.drain(..) { - self.stored_tokens.push_back(t) - } - self.next() - } - Err(e) => Some(Err(e)), - }, - ch if is_test_start(ch) => Some(self.pl(start)), - ch if is_dec_digit(ch) => Some(self.nm(start)), - ch if ch.is_whitespace() => Some(Ok(self.ws(start))), - _ => { - let str = format!("{}", ch); - Some(Ok(self.spanned2(start, start, Token::Bad(str)))) + let (start, ch) = self.bump()?; + match ch as char { + // '...' => Some(Ok(self.spanned2(start, self.next_index(), Token::DotDotDot))), + // ".." => Some(Ok(self.spanned2(start, self.next_index(), Token::DotDot))), + ',' => Some(Ok(self.spanned2(start, start + ch, Token::Comma))), + '$' => Some(Ok(self.spanned2(start, start + ch, Token::Dollar))), + '.' => Some(Ok(self.spanned2(start, start + ch, Token::Dot))), + // '?' => Some(Ok(self.spanned2(start, start, Token::Question))), + '_' => Some(Ok(self.spanned2(start, start + ch, Token::DontCare))), + ';' => Some(Ok(self.spanned2(start, start + ch, Token::Semi))), + '+' => Some(Ok(self.spanned2(start, start + ch, Token::Add))), + '*' => Some(Ok(self.spanned2(start, start + ch, Token::Mul))), + '\\' => Some(Ok(self.spanned2(start, start + ch, Token::BSlash))), + '(' => Some(Ok(self.spanned2(start, start + ch, Token::LParen))), + ')' => Some(Ok(self.spanned2(start, start + ch, Token::RParen))), + '{' => Some(Ok(self.spanned2(start, start + ch, Token::LBrace))), + '}' => Some(Ok(self.spanned2(start, start + ch, Token::RBrace))), + '[' => Some(Ok(self.spanned2(start, start + ch, Token::LBracket))), + ']' => Some(Ok(self.spanned2(start, start + ch, Token::RBracket))), + '/' => Some(Ok(self.spanned2(start, start + ch, Token::Div))), + // TODO account for extractors which use | to mark format boundaries + //'|' => Some(Ok(self.spanned2(start, start, Token::BitOr))), + '^' => Some(Ok(self.spanned2(start, start + ch, Token::BitXor))), + '&' => Some(Ok(self.spanned2(start, start + ch, Token::BitAnd))), + ':' => Some(Ok(self.cn(start))), + '-' => match self.lookahead() { + Some((_loc, c)) if is_dec_digit(c) => Some(self.nm(start)), + _ => Some(Ok(self.spanned2(start, start + ch, Token::Sub))), + }, + '#' => Some(self.cx(start)), + '=' => Some(Ok(self.eq(start))), + '<' => Some(Ok(self.lt(start))), + '>' => Some(Ok(self.gt(start))), + '%' => Some(self.pb(start)), + '~' => Some(self.tl(start)), + '`' => Some(self.id2(start)), + // TODO account for bitwise not operator + '!' => Some(self.pe(start)), + '\n' => Some(Ok(self.spanned2(start, start, Token::NewLine))), + ch if is_ident_start(ch) => Some(Ok(self.id(start))), + '"' => match self.qs_or_hd(start) { + Ok(mut tokens) => { + for t in tokens.drain(..) { + self.stored_tokens.push_back(t) } + self.next() } + Err(e) => Some(Err(e)), + }, + ch if is_test_start(ch) => Some(self.pl(start)), + ch if is_dec_digit(ch) => Some(self.nm(start)), + ch if ch.is_whitespace() => Some(Ok(self.ws(start))), + _ => { + let str = format!("{}", ch); + Some(Ok(self.spanned2(start, start, Token::Bad(str)))) } } } From 429511fa28b30c756f2b13bfab2830db01e6cb45 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Mon, 17 May 2021 14:21:42 +0200 Subject: [PATCH 7/8] Better coverage for aggregate functions Signed-off-by: Heinz N. Gies --- CHANGELOG.md | 2 + tremor-script/src/registry.rs | 18 +- tremor-script/src/std_lib/stats.rs | 403 ++++++++++++++++++----------- tremor-script/src/std_lib/win.rs | 36 +-- 4 files changed, 277 insertions(+), 182 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 321a44f88f..35ca090b7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,8 @@ - Include `cncf::otel` stdlib sources in deb package - Add `/usr/local/share/tremor` to default `TREMOR_PATH` also for all packages as a well-known directory for custom tremor-script libraries and modules. - Record the partition number assigned during rebalancing when running Kafka. +- Fix bug in HDR histogram implementation when using emit without reset. +- Fix bug in mean that invalid values would be counted as part of the total number of values. ## 0.11.1 diff --git a/tremor-script/src/registry.rs b/tremor-script/src/registry.rs index 88fa92ada9..0b010b031f 100644 --- a/tremor-script/src/registry.rs +++ b/tremor-script/src/registry.rs @@ -37,7 +37,7 @@ pub trait TremorAggrFn: DowncastSync + Sync + Send { /// /// # Errors /// if the function can not compensate the data - fn compensate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()>; + // fn compensate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()>; /// Emits the function /// /// # Errors @@ -706,14 +706,14 @@ impl TremorAggrFnWrapper { self.fun.accumulate(args) } - /// Compensate for a value being removed - /// - /// # Errors - /// if compensating the function fails - #[cfg(not(tarpaulin_include))] // just a passthrough - pub fn compensate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()> { - self.fun.compensate(args) - } + // /// Compensate for a value being removed + // /// + // /// # Errors + // /// if compensating the function fails + // #[cfg(not(tarpaulin_include))] // just a passthrough + // pub fn compensate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()> { + // self.fun.compensate(args) + // } /// Emits the function /// diff --git a/tremor-script/src/std_lib/stats.rs b/tremor-script/src/std_lib/stats.rs index 284f298a31..f14b2a9071 100644 --- a/tremor-script/src/std_lib/stats.rs +++ b/tremor-script/src/std_lib/stats.rs @@ -51,10 +51,10 @@ impl TremorAggrFn for Count { self.0 += 1; Ok(()) } - fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { - self.0 -= 1; - Ok(()) - } + // fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { + // self.0 -= 1; + // Ok(()) + // } fn emit<'event>(&mut self) -> FResult> { Ok(Value::from(self.0)) } @@ -69,6 +69,7 @@ impl TremorAggrFn for Count { } Ok(()) } + #[cfg(not(tarpaulin_include))] fn boxed_clone(&self) -> Box { Box::new(self.clone()) } @@ -93,19 +94,19 @@ impl TremorAggrFn for Sum { }, ) } - fn compensate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()> { - args.first().cast_f64().map_or_else( - || { - Err(FunctionError::BadType { - mfa: mfa("stats", "sum", 1), - }) - }, - |v| { - self.0 -= v; - Ok(()) - }, - ) - } + // fn compensate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()> { + // args.first().cast_f64().map_or_else( + // || { + // Err(FunctionError::BadType { + // mfa: mfa("stats", "sum", 1), + // }) + // }, + // |v| { + // self.0 -= v; + // Ok(()) + // }, + // ) + // } fn emit<'event>(&mut self) -> FResult> { Ok(Value::from(self.0)) } @@ -120,6 +121,7 @@ impl TremorAggrFn for Sum { } Ok(()) } + #[cfg(not(tarpaulin_include))] fn boxed_clone(&self) -> Box { Box::new(self.clone()) } @@ -132,7 +134,6 @@ impl TremorAggrFn for Sum { struct Mean(i64, f64); impl TremorAggrFn for Mean { fn accumulate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()> { - self.0 += 1; args.first().cast_f64().map_or_else( || { Err(FunctionError::BadType { @@ -141,24 +142,25 @@ impl TremorAggrFn for Mean { }, |v| { self.1 += v; + self.0 += 1; Ok(()) }, ) } - fn compensate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()> { - self.0 -= 1; - args.first().cast_f64().map_or_else( - || { - Err(FunctionError::BadType { - mfa: mfa("stats", "mean", 1), - }) - }, - |v| { - self.1 -= v; - Ok(()) - }, - ) - } + // fn compensate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()> { + // self.0 -= 1; + // args.first().cast_f64().map_or_else( + // || { + // Err(FunctionError::BadType { + // mfa: mfa("stats", "mean", 1), + // }) + // }, + // |v| { + // self.1 -= v; + // Ok(()) + // }, + // ) + // } fn emit<'event>(&mut self) -> FResult> { if self.0 == 0 { Ok(Value::null()) @@ -179,6 +181,7 @@ impl TremorAggrFn for Mean { } Ok(()) } + #[cfg(not(tarpaulin_include))] fn boxed_clone(&self) -> Box { Box::new(self.clone()) } @@ -205,24 +208,24 @@ impl TremorAggrFn for Min { }, ) } - fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { - // TODO: how? - // [a, b, c, d, e, f, g, h, i, j]; - // a -> [(0, a)] - // b -> [(0, a), (1, b)] - // c -> [(0, a), (1, b), (2, c)] ... - - // [d, t, u, a, t, u, b, c, 6]; - // d -> [(0, d)] - // t -> [(0, d), (1, t)] - // u -> [(0, d), (1, t), (2, u)] ... - // a -> [(3, a)] ... - // t -> [(3, a), (4, t)] ... - // u -> [(3, a), (4, t), (5, u)] ... - // b -> [(3, a), (5, b)] ... + // fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { + // // TODO: how? + // // [a, b, c, d, e, f, g, h, i, j]; + // // a -> [(0, a)] + // // b -> [(0, a), (1, b)] + // // c -> [(0, a), (1, b), (2, c)] ... - Ok(()) - } + // // [d, t, u, a, t, u, b, c, 6]; + // // d -> [(0, d)] + // // t -> [(0, d), (1, t)] + // // u -> [(0, d), (1, t), (2, u)] ... + // // a -> [(3, a)] ... + // // t -> [(3, a), (4, t)] ... + // // u -> [(3, a), (4, t), (5, u)] ... + // // b -> [(3, a), (5, b)] ... + + // Ok(()) + // } fn emit<'event>(&mut self) -> FResult> { Ok(Value::from(self.0.unwrap_or_default())) } @@ -239,6 +242,7 @@ impl TremorAggrFn for Min { } Ok(()) } + #[cfg(not(tarpaulin_include))] fn boxed_clone(&self) -> Box { Box::new(self.clone()) } @@ -265,10 +269,10 @@ impl TremorAggrFn for Max { }, ) } - fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { - // TODO: how? - Ok(()) - } + // fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { + // // TODO: how? + // Ok(()) + // } fn emit<'event>(&mut self) -> FResult> { Ok(Value::from(self.0.unwrap_or_default())) } @@ -285,6 +289,7 @@ impl TremorAggrFn for Max { } Ok(()) } + #[cfg(not(tarpaulin_include))] fn boxed_clone(&self) -> Box { Box::new(self.clone()) } @@ -320,21 +325,21 @@ impl TremorAggrFn for Var { }, ) } - fn compensate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()> { - args.first().cast_f64().map_or_else( - || { - Err(FunctionError::BadType { - mfa: mfa("stats", "var", 1), - }) - }, - |v| { - self.n -= 1; - self.ex -= v - self.k; - self.ex2 -= (v - self.k) * (v - self.k); - Ok(()) - }, - ) - } + // fn compensate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()> { + // args.first().cast_f64().map_or_else( + // || { + // Err(FunctionError::BadType { + // mfa: mfa("stats", "var", 1), + // }) + // }, + // |v| { + // self.n -= 1; + // self.ex -= v - self.k; + // self.ex2 -= (v - self.k) * (v - self.k); + // Ok(()) + // }, + // ) + // } fn emit<'event>(&mut self) -> FResult> { if self.n == 0 { Ok(Value::from(0.0)) @@ -360,6 +365,7 @@ impl TremorAggrFn for Var { } Ok(()) } + #[cfg(not(tarpaulin_include))] fn boxed_clone(&self) -> Box { Box::new(self.clone()) } @@ -375,9 +381,9 @@ impl TremorAggrFn for Stdev { fn accumulate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()> { self.0.accumulate(args) } - fn compensate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()> { - self.0.compensate(args) - } + // fn compensate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()> { + // self.0.compensate(args) + // } fn emit<'event>(&mut self) -> FResult> { self.0 .emit() @@ -394,6 +400,7 @@ impl TremorAggrFn for Stdev { } Ok(()) } + #[cfg(not(tarpaulin_include))] fn boxed_clone(&self) -> Box { Box::new(self.clone()) } @@ -453,8 +460,8 @@ impl std::default::Default for Dds { impl TremorAggrFn for Dds { fn accumulate<'event>(&mut self, args: &[&Value<'event>]) -> FResult<()> { - if let Some(vals) = args.get(1).as_array() { - if !self.percentiles_set { + if !self.percentiles_set { + if let Some(vals) = args.get(1).as_array() { let percentiles: FResult> = vals .iter() .flat_map(|v| v.as_str().map(String::from)) @@ -498,7 +505,7 @@ impl TremorAggrFn for Dds { error: e.to_string(), } } - let mut p = hashmap! {}; + let mut p = Value::object_with_capacity(self.percentiles.len() + 3); let histo = if let Some(histo) = self.histo.as_ref() { histo } else { @@ -518,20 +525,16 @@ impl TremorAggrFn for Dds { let count = histo.count(); let (min, max, sum) = if count == 0 { for (pcn, _percentile) in &self.percentiles { - p.insert(pcn.clone().into(), Value::from(0.0)); + p.try_insert(pcn.clone(), 0.0); } - (0_f64, f64::MAX, 0_f64) + (0_f64, 0_f64, 0_f64) } else { for (pcn, percentile) in &self.percentiles { - if let Ok(Some(quantile)) = histo.quantile(*percentile) { - let quantile_dsp = ceil(quantile, 1); // Round for equiv with HDR ( 2 digits ) - p.insert(pcn.clone().into(), Value::from(quantile_dsp)); - } else { - return Err(err(&format!( - "Unable to calculate percentile '{}'", - *percentile - ))); - } + let quantile = histo.quantile(*percentile).ok().flatten().ok_or_else(|| { + err(&format!("Unable to calculate percentile '{}'", *percentile)) + })?; + let quantile_dsp = ceil(quantile, 1); // Round for equiv with HDR ( 2 digits ) + p.try_insert(pcn.clone(), quantile_dsp); } ( histo.min().ok_or_else(|| err(&"Unable to calculate min"))?, @@ -539,19 +542,19 @@ impl TremorAggrFn for Dds { histo.sum().ok_or_else(|| err(&"Unable to calculate sum"))?, ) }; - Ok(Value::from(hashmap! { - "count".into() => Value::from(count), - "min".into() => Value::from(min), - "max".into() => Value::from(max), - "mean".into() => Value::from(sum / count as f64), - "percentiles".into() => Value::from(p), - })) + let mut res = Value::object_with_capacity(5); + res.try_insert("count", count); + res.try_insert("min", min); + res.try_insert("max", max); + res.try_insert("mean", sum / count as f64); + res.try_insert("percentiles", p); + Ok(res) } - fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { - // TODO there's no facility for this with dds histogram, punt for now - Ok(()) - } + // fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { + // // TODO there's no facility for this with dds histogram, punt for now + // Ok(()) + // } fn merge(&mut self, src: &dyn TremorAggrFn) -> FResult<()> { let other: Option<&Self> = src.downcast_ref::(); @@ -611,6 +614,7 @@ impl TremorAggrFn for Dds { self.histo = None; self.cache.clear(); } + #[cfg(not(tarpaulin_include))] fn boxed_clone(&self) -> Box { Box::new(self.clone()) } @@ -625,7 +629,7 @@ struct Hdr { cache: Vec, percentiles: Vec<(String, f64)>, percentiles_set: bool, - max: u64, + high_bound: u64, } const HIST_MAX_CACHE_SIZE: usize = 8192; @@ -646,14 +650,14 @@ impl std::default::Default for Hdr { ("0.99999".to_string(), 0.99999), ], percentiles_set: false, - max: 0, + high_bound: 0, } } } impl Hdr { - fn max(&self) -> u64 { - max(self.max, 4) + fn high_bound(&self) -> u64 { + max(self.high_bound, 4) } } impl TremorAggrFn for Hdr { @@ -688,15 +692,17 @@ impl TremorAggrFn for Hdr { error: format!("failed to record value: {:?}", e), })?; } else { - if v > self.max { - self.max = v; + if v > self.high_bound { + self.high_bound = v; } self.cache.push(v); if self.cache.len() == HIST_MAX_CACHE_SIZE { - let mut histo: Histogram = Histogram::new_with_bounds(1, self.max(), 2) - .map_err(|e| FunctionError::RuntimeError { - mfa: mfa("stats", "hdr", 2), - error: format!("failed to allocate hdr storage: {:?}", e), + let mut histo: Histogram = + Histogram::new_with_bounds(1, self.high_bound(), 2).map_err(|e| { + FunctionError::RuntimeError { + mfa: mfa("stats", "hdr", 2), + error: format!("failed to allocate hdr storage: {:?}", e), + } })?; histo.auto(true); for v in self.cache.drain(..) { @@ -719,6 +725,7 @@ impl TremorAggrFn for Hdr { self.percentiles = other.percentiles.clone(); self.percentiles_set = true; }; + self.high_bound = max(self.high_bound, other.high_bound); if let Some(ref mut histo) = self.histo { // If this is a histogram and we merge @@ -753,9 +760,9 @@ impl TremorAggrFn for Hdr { // If both are caches if self.cache.len() + other.cache.len() > HIST_MAX_CACHE_SIZE { // If the cache size exceeds our maximal cache size drain them into a histogram - self.max = max(self.max, other.max); + self.high_bound = max(self.high_bound, other.high_bound); let mut histo: Histogram = - Histogram::new_with_bounds(1, self.max(), 2).map_err(|e| { + Histogram::new_with_bounds(1, self.high_bound(), 2).map_err(|e| { FunctionError::RuntimeError { mfa: mfa("stats", "hdr", 2), error: format!("failed to init historgrams: {:?}", e), @@ -778,7 +785,6 @@ impl TremorAggrFn for Hdr { } else { // If not append it's cache self.cache.extend(&other.cache); - self.max = max(self.max, other.max); } } } @@ -786,10 +792,10 @@ impl TremorAggrFn for Hdr { Ok(()) } - fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { - // TODO there's no facility for this with hdr histogram, punt for now - Ok(()) - } + // fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { + // // TODO there's no facility for this with hdr histogram, punt for now + // Ok(()) + // } fn emit<'event>(&mut self) -> FResult> { let mut p = hashmap! {}; if let Some(histo) = &self.histo { @@ -809,12 +815,10 @@ impl TremorAggrFn for Hdr { "percentiles".into() => Value::from(p), })) } else { - let mut histo: Histogram = - Histogram::new_with_bounds(1, self.max(), 2).map_err(|e| { - FunctionError::RuntimeError { - mfa: mfa("stats", "hdr", 2), - error: format!("failed to init historgrams: {:?}", e), - } + let mut histo: Histogram = Histogram::new_with_bounds(1, self.high_bound(), 2) + .map_err(|e| FunctionError::RuntimeError { + mfa: mfa("stats", "hdr", 2), + error: format!("failed to init historgrams: {:?}", e), })?; histo.auto(true); for v in self.cache.drain(..) { @@ -829,7 +833,7 @@ impl TremorAggrFn for Hdr { Value::from(histo.value_at_percentile(percentile * 100.0)), ); } - Ok(Value::from(hashmap! { + let res = Value::from(hashmap! { "count".into() => Value::from(histo.len()), "min".into() => Value::from(histo.min()), "max".into() => Value::from(histo.max()), @@ -837,14 +841,17 @@ impl TremorAggrFn for Hdr { "stdev".into() => Value::from(histo.stdev()), "var".into() => Value::from(histo.stdev().powf(2.0)), "percentiles".into() => Value::from(p), - })) + }); + self.histo = Some(histo); + Ok(res) } } fn init(&mut self) { self.histo = None; - self.max = 0; + self.high_bound = 0; self.cache.clear(); } + #[cfg(not(tarpaulin_include))] fn boxed_clone(&self) -> Box { Box::new(self.clone()) } @@ -928,32 +935,54 @@ mod test { #[test] fn min() -> Result<()> { + let mut a = Min::default(); + a.init(); + let one = Value::from(1); let two = Value::from(2); let three = Value::from(3); - let mut a = Min::default(); - a.init(); + + let err = Value::null(); + assert!(a.accumulate(&[&err]).is_err()); + a.accumulate(&[&one])?; a.accumulate(&[&two])?; a.accumulate(&[&three])?; + assert_eq!(a.emit()?, 1.0); let mut b = Min::default(); b.init(); b.merge(&a)?; assert_eq!(a.emit()?, 1.0); + + assert_eq!(a.arity(), 1..=1); + Ok(()) } #[test] fn max() -> Result<()> { let mut a = Max::default(); a.init(); + let one = Value::from(1); let two = Value::from(2); let three = Value::from(3); + + let err = Value::null(); + assert!(a.accumulate(&[&err]).is_err()); + a.accumulate(&[&one])?; a.accumulate(&[&two])?; a.accumulate(&[&three])?; + assert_eq!(a.emit()?, 3.0); + let mut b = Max::default(); + b.init(); + b.merge(&a)?; + assert_eq!(a.emit()?, 3.0); + + assert_eq!(a.arity(), 1..=1); + Ok(()) } @@ -964,10 +993,22 @@ mod test { let one = Value::from(1); let two = Value::from(2); let three = Value::from(3); + let four = Value::from(4); + let err = Value::null(); + assert!(a.accumulate(&[&err]).is_err()); + a.accumulate(&[&one])?; a.accumulate(&[&two])?; a.accumulate(&[&three])?; assert_eq!(a.emit()?, 6.0); + + let mut b = Mean::default(); + b.init(); + a.accumulate(&[&four])?; + a.merge(&b)?; + assert_eq!(a.emit()?, 10.0); + + assert_eq!(a.arity(), 1..=1); Ok(()) } @@ -975,13 +1016,25 @@ mod test { fn mean() -> Result<()> { let mut a = Mean::default(); a.init(); + assert_eq!(a.emit()?, ()); let one = Value::from(1); let two = Value::from(2); let three = Value::from(3); + let four = Value::from(4); + let err = Value::null(); + assert!(a.accumulate(&[&err]).is_err()); a.accumulate(&[&one])?; a.accumulate(&[&two])?; a.accumulate(&[&three])?; assert_eq!(a.emit()?, 2.0); + + let mut b = Mean::default(); + b.init(); + a.accumulate(&[&four])?; + a.merge(&b)?; + assert_eq!(a.emit()?, 2.5); + + assert_eq!(a.arity(), 1..=1); Ok(()) } @@ -989,10 +1042,15 @@ mod test { fn variance() -> Result<()> { let mut a = Var::default(); a.init(); + + assert_eq!(a.emit()?, 0.0); + let two = Value::from(2); let four = Value::from(4); let nineteen = Value::from(19); let nine = Value::from(9); + let err = Value::null(); + assert!(a.accumulate(&[&err]).is_err()); a.accumulate(&[&two])?; a.accumulate(&[&four])?; a.accumulate(&[&nineteen])?; @@ -1019,6 +1077,8 @@ mod test { let r = b.emit()?.cast_f64().expect("screw it"); assert!(approx_eq!(f64, dbg!(r), 33.928_571_428_571_43)); + assert_eq!(a.arity(), 1..=1); + Ok(()) } @@ -1029,10 +1089,21 @@ mod test { let two = Value::from(2); let four = Value::from(4); let nineteen = Value::from(19); + + let err = Value::null(); + assert!(a.accumulate(&[&err]).is_err()); + a.accumulate(&[&two])?; a.accumulate(&[&four])?; a.accumulate(&[&nineteen])?; assert!((a.emit()?.cast_f64().expect("screw it") - (259.0 as f64 / 3.0).sqrt()) < 0.001); + + let mut b = Stdev::default(); + b.merge(&a)?; + assert!((b.emit()?.cast_f64().expect("screw it") - (259.0 as f64 / 3.0).sqrt()) < 0.001); + + assert_eq!(a.arity(), 1..=1); + Ok(()) } @@ -1040,19 +1111,21 @@ mod test { fn hdr() -> Result<()> { let mut a = Hdr::default(); a.init(); - let mut i = 1; - loop { - if i > 100 { - break; - } + assert!(a + .accumulate(&[ + &Value::from(0), + &literal!(["snot", "0.9", "0.95", "0.99", "0.999", "0.9999"]), + ]) + .is_err()); + + for i in 1..=100 { a.accumulate(&[ &Value::from(i), - &Value::from(vec!["0.5", "0.9", "0.95", "0.99", "0.999", "0.9999"]), + &literal!(["0.5", "0.9", "0.95", "0.99", "0.999", "0.9999"]), ])?; - i += 1; } - let v = a.emit()?; + let e = literal!({ "min": 1, "max": 100, @@ -1069,7 +1142,15 @@ mod test { "0.9999": 100 } }); - assert_eq!(v, e); + assert_eq!(a.emit()?, e); + + let mut b = Hdr::default(); + b.init(); + b.merge(&a)?; + assert_eq!(b.emit()?, e); + + assert_eq!(a.arity(), 1..=2); + Ok(()) } @@ -1079,36 +1160,44 @@ mod test { let mut a = Dds::default(); a.init(); - let mut i = 1; - loop { - if i > 100 { - break; - } + assert!(a + .accumulate(&[ + &Value::from(0), + &literal!(["snot", "0.9", "0.95", "0.99", "0.999", "0.9999"]), + ]) + .is_err()); + + for i in 1..=100 { a.accumulate(&[ &Value::from(i), - &Value::from(vec!["0.5", "0.9", "0.95", "0.99", "0.999", "0.9999"]), + &literal!(["0.5", "0.9", "0.95", "0.99", "0.999", "0.9999"]), ])?; - i += 1; } - let v = a.emit()?; + let e = literal!({ - "min": 1.0, - "max": 100.0, - "count": 100, - "mean": 50.5, - // "stdev": 28.866_070_047_722_12, - // "var": 833.25, - "percentiles": { - "0.5": 50.0, - "0.9": 89.2, - "0.95": 94.7, - "0.99": 98.6, - "0.999": 98.6, - "0.9999": 98.6, - } - }); - assert_eq!(v, e); + "min": 1.0, + "max": 100.0, + "count": 100, + "mean": 50.5, + "percentiles": { + "0.5": 50.0, + "0.9": 89.2, + "0.95": 94.7, + "0.99": 98.6, + "0.999": 98.6, + "0.9999": 98.6, + } + }); + assert_eq!(a.emit()?, e); + + let mut b = Dds::default(); + b.init(); + b.merge(&a)?; + assert_eq!(b.emit()?, e); + + assert_eq!(a.arity(), 1..=2); + Ok(()) } } diff --git a/tremor-script/src/std_lib/win.rs b/tremor-script/src/std_lib/win.rs index 3291ed435b..997eef4660 100644 --- a/tremor-script/src/std_lib/win.rs +++ b/tremor-script/src/std_lib/win.rs @@ -27,10 +27,10 @@ impl TremorAggrFn for First { } Ok(()) } - fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { - // TODO: how? - Ok(()) - } + // fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { + // // TODO: how? + // Ok(()) + // } fn emit<'event>(&mut self) -> FResult> { Ok(self.0.as_ref().map_or_else(Value::null, Clone::clone)) } @@ -52,6 +52,7 @@ impl TremorAggrFn for First { } Ok(()) } + #[cfg(not(tarpaulin_include))] fn boxed_clone(&self) -> Box { Box::new(self.clone()) } @@ -67,10 +68,10 @@ impl TremorAggrFn for Last { self.0 = args.first().map(|v| (*v).clone_static()); Ok(()) } - fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { - // TODO: how? - Ok(()) - } + // fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { + // // TODO: how? + // Ok(()) + // } fn emit<'event>(&mut self) -> FResult> { Ok(self.0.as_ref().map_or_else(Value::null, Clone::clone)) } @@ -92,6 +93,7 @@ impl TremorAggrFn for Last { } Ok(()) } + #[cfg(not(tarpaulin_include))] fn boxed_clone(&self) -> Box { Box::new(self.clone()) } @@ -109,10 +111,10 @@ impl TremorAggrFn for CollectFlattened { } Ok(()) } - fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { - // TODO: how? - Ok(()) - } + // fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { + // // TODO: how? + // Ok(()) + // } fn emit<'event>(&mut self) -> FResult> { Ok(Value::from(self.0.clone())) } @@ -133,6 +135,7 @@ impl TremorAggrFn for CollectFlattened { } Ok(()) } + #[cfg(not(tarpaulin_include))] fn boxed_clone(&self) -> Box { Box::new(self.clone()) } @@ -155,10 +158,10 @@ impl TremorAggrFn for CollectNested { } Ok(()) } - fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { - // TODO: how? - Ok(()) - } + // fn compensate<'event>(&mut self, _args: &[&Value<'event>]) -> FResult<()> { + // // TODO: how? + // Ok(()) + // } fn emit<'event>(&mut self) -> FResult> { Ok(Value::Array(self.0.clone())) } @@ -179,6 +182,7 @@ impl TremorAggrFn for CollectNested { } Ok(()) } + #[cfg(not(tarpaulin_include))] fn boxed_clone(&self) -> Box { Box::new(self.clone()) } From 0407337018d98ac23d7022fe061722be5cd1f9d5 Mon Sep 17 00:00:00 2001 From: "Heinz N. Gies" Date: Mon, 17 May 2021 16:57:05 +0200 Subject: [PATCH 8/8] Improve coverage for raw Signed-off-by: Heinz N. Gies --- tests/script_error.rs | 3 + .../script_errors/bin_invalid_bits/error.txt | 3 + .../bin_invalid_bits/script.tremor | 1 + .../script_errors/bin_invalid_type/error.txt | 3 + .../bin_invalid_type/script.tremor | 1 + .../script_errors/double_const_mod/error.txt | 6 ++ .../double_const_mod/script.tremor | 4 ++ tremor-script/src/ast/raw.rs | 70 ++++++------------- 8 files changed, 43 insertions(+), 48 deletions(-) create mode 100644 tests/script_errors/bin_invalid_bits/error.txt create mode 100644 tests/script_errors/bin_invalid_bits/script.tremor create mode 100644 tests/script_errors/bin_invalid_type/error.txt create mode 100644 tests/script_errors/bin_invalid_type/script.tremor create mode 100644 tests/script_errors/double_const_mod/error.txt create mode 100644 tests/script_errors/double_const_mod/script.tremor diff --git a/tests/script_error.rs b/tests/script_error.rs index 3dd2777deb..cc854d399b 100644 --- a/tests/script_error.rs +++ b/tests/script_error.rs @@ -134,6 +134,9 @@ test_cases!( pp_cyclic, pp_nest_cyclic, // INSERT + double_const_mod, + bin_invalid_bits, + bin_invalid_type, merge_ident, select_ident, function_already_defined, diff --git a/tests/script_errors/bin_invalid_bits/error.txt b/tests/script_errors/bin_invalid_bits/error.txt new file mode 100644 index 0000000000..4ad639c2b3 --- /dev/null +++ b/tests/script_errors/bin_invalid_bits/error.txt @@ -0,0 +1,3 @@ +Error: + 1 | <> + | ^^^^^^^^^ negative bits or bits > 64 are are not allowed: 100 \ No newline at end of file diff --git a/tests/script_errors/bin_invalid_bits/script.tremor b/tests/script_errors/bin_invalid_bits/script.tremor new file mode 100644 index 0000000000..6e3852199c --- /dev/null +++ b/tests/script_errors/bin_invalid_bits/script.tremor @@ -0,0 +1 @@ +<> \ No newline at end of file diff --git a/tests/script_errors/bin_invalid_type/error.txt b/tests/script_errors/bin_invalid_type/error.txt new file mode 100644 index 0000000000..67b5c92f8c --- /dev/null +++ b/tests/script_errors/bin_invalid_type/error.txt @@ -0,0 +1,3 @@ +Error: + 1 | <> + | ^^^^^^^^^^ Not a valid data type: 'snot' \ No newline at end of file diff --git a/tests/script_errors/bin_invalid_type/script.tremor b/tests/script_errors/bin_invalid_type/script.tremor new file mode 100644 index 0000000000..4fa422672d --- /dev/null +++ b/tests/script_errors/bin_invalid_type/script.tremor @@ -0,0 +1 @@ +<> \ No newline at end of file diff --git a/tests/script_errors/double_const_mod/error.txt b/tests/script_errors/double_const_mod/error.txt new file mode 100644 index 0000000000..b27eb0bd93 --- /dev/null +++ b/tests/script_errors/double_const_mod/error.txt @@ -0,0 +1,6 @@ +Error: + 1 | mod test with + 2 | const a = 1; + 3 | const a = 2; + | ^^^^^^^^^^^ Can't declare the constant `a` twice + 4 | end; \ No newline at end of file diff --git a/tests/script_errors/double_const_mod/script.tremor b/tests/script_errors/double_const_mod/script.tremor new file mode 100644 index 0000000000..5d6641530c --- /dev/null +++ b/tests/script_errors/double_const_mod/script.tremor @@ -0,0 +1,4 @@ +mod test with +const a = 1; +const a = 2; +end; diff --git a/tremor-script/src/ast/raw.rs b/tremor-script/src/ast/raw.rs index 71063fb542..e3ae6132e8 100644 --- a/tremor-script/src/ast/raw.rs +++ b/tremor-script/src/ast/raw.rs @@ -225,7 +225,7 @@ impl<'script> Upable<'script> for BytesPartRaw<'script> { return Err(err_generic( &self, &self, - &format!("Not a valid data type: '{}' ({:?})", other.join("-"), other), + &format!("Not a valid data type: '{}'", other.join("-")), &helper.meta, )) } @@ -311,11 +311,8 @@ impl<'script> ModuleRaw<'script> { let expr = expr.up(helper)?; let v = reduce2(expr, &helper)?; helper.consts.insert(name_v, v).map_err(|_old| { - Error::from(ErrorKind::DoubleConst( - Range::from((start, end)).expand_lines(2), - Range::from((start, end)), - name.to_string(), - )) + let r = Range::from((start, end)); + ErrorKind::DoubleConst(r.expand_lines(2), r, name.to_string()) })?; } ExprRaw::FnDecl(f) => { @@ -332,14 +329,8 @@ impl<'script> ModuleRaw<'script> { helper.register_fun(f)?; } - e => { - return error_generic( - &e, - &e, - &"Can't have expressions inside of modules", - &helper.meta, - ) - } + // ALLOW: the gramer doesn't allow this + _ => unreachable!("Can't have expressions inside of modules"), } } helper.module.pop(); @@ -560,27 +551,9 @@ impl<'script> Upable<'script> for ExprRaw<'script> { #[allow(clippy::clippy::too_many_lines)] fn up<'registry>(self, helper: &mut Helper<'script, 'registry>) -> Result { Ok(match self { - ExprRaw::Module(ModuleRaw { start, end, .. }) => { - // There is no code path that leads here, - // we still rather have an error in case we made - // an error then unreachable - - return Err(ErrorKind::InvalidMod( - Range::from((start, end)).expand_lines(2), - Range::from((start, end)), - ) - .into()); - } - ExprRaw::Const { start, end, .. } => { - // There is no code path that leads here, - // we still rather have an error in case we made - // an error then unreachable - - return Err(ErrorKind::InvalidConst( - Range::from((start, end)).expand_lines(2), - Range::from((start, end)), - ) - .into()); + ExprRaw::FnDecl(_) | ExprRaw::Const { .. } | ExprRaw::Module(ModuleRaw { .. }) => { + // ALLOW: There is no code path that leads here, + unreachable!() } ExprRaw::MatchExpr(m) => match m.up(helper)? { Match { @@ -674,25 +647,16 @@ impl<'script> Upable<'script> for ExprRaw<'script> { } ExprRaw::Emit(e) => Expr::Emit(Box::new(e.up(helper)?)), ExprRaw::Imut(i) => i.up(helper)?.into(), - ExprRaw::FnDecl(f) => { - // There is no code path that leads here, - // we still rather have an error in case we made - // an error then unreachable - - return Err(ErrorKind::InvalidFn( - f.extent(&helper.meta).expand_lines(2), - f.extent(&helper.meta), - ) - .into()); - } }) } } +#[cfg(not(tarpaulin_include))] // just dispatch impl<'script> BaseExpr for ExprRaw<'script> { fn mid(&self) -> usize { 0 } + fn s(&self, meta: &NodeMetas) -> Location { match self { ExprRaw::Const { start, .. } | ExprRaw::Drop { start, .. } => *start, @@ -705,6 +669,7 @@ impl<'script> BaseExpr for ExprRaw<'script> { ExprRaw::Imut(e) => e.s(meta), } } + fn e(&self, meta: &NodeMetas) -> Location { match self { ExprRaw::Const { end, .. } | ExprRaw::Drop { end, .. } => *end, @@ -809,18 +774,17 @@ impl<'script> Upable<'script> for AnyFnRaw<'script> { } } +#[cfg(not(tarpaulin_include))] // just dispatch impl<'script> BaseExpr for AnyFnRaw<'script> { fn mid(&self) -> usize { 0 } - fn s(&self, _meta: &NodeMetas) -> Location { match self { AnyFnRaw::Match(m) => m.start, AnyFnRaw::Normal(m) => m.start, } } - fn e(&self, _meta: &NodeMetas) -> Location { match self { AnyFnRaw::Match(m) => m.end, @@ -2646,3 +2610,13 @@ pub type ComprehensionCasesRaw<'script, Ex> = Vec = Vec>; pub type ArrayPredicatePatternsRaw<'script> = Vec>; pub type WithExprsRaw<'script> = Vec<(IdentRaw<'script>, ImutExprRaw<'script>)>; + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn default() { + assert_eq!(Endian::default(), Endian::Big); + assert_eq!(BytesDataType::default(), BytesDataType::UnsignedInteger); + } +}