Keys can be naked

This update makes it so keys appear as a first class citizen to the query. They no longer MUST be wrapped in "". This also adds `` as the key escape mechanism if you want to force the parser to switch into parsing a key.
d1ngd0 · Sep 11, 2024 · 3ceb198 · 3ceb198
1 parent c75a920
commit 3ceb198
Show file tree

Hide file tree

Showing 16 changed files with 259 additions and 149 deletions.
diff --git a/src/path/lexer.rs b/src/path/lexer.rs
@@ -14,6 +14,20 @@ const TOKEN_REGEX: char = '/';
 const TOKEN_ESCAPE: char = '\\';
 const TOKEN_STRING_WRAP: char = '"';
 
+// These tokens are not actually used by the path parser, but
+// they should end lexing since they are special tokens higher up.
+const TOKEN_KEY_WRAP: char = '`';
+const TOKEN_EQUAL: char = '=';
+// const TOKEN_COMMA: char = ','; first_sep
+const TOKEN_GREATER: char = '>';
+const TOKEN_LESS: char = '<';
+const TOKEN_COLON: char = ':';
+const TOKEN_PLUS: char = '+';
+const TOKEN_MINUS: char = '-';
+// const TOKEN_DIVIDE: char = '/'; regex
+// const TOKEN_MULTIPLY: char = '*'; wildcard
+const TOKEN_MODULUS: char = '%';
+
 pub struct Lexer<'a> {
     path: &'a str,
     head: usize,
@@ -47,6 +61,12 @@ impl<'a> Lexer<'a> {
         Some(tok)
     }
 
+    // chars_consumed returns the number of characters that were consumed
+    // by the lexor.
+    pub fn chars_consumed(&self) -> usize {
+        self.head
+    }
+
     // peak returns the next token without moving the head forward
     pub fn peak(&mut self) -> Option<&'a str> {
         // keep track of "escape_token" state before the call so we can
@@ -97,11 +117,19 @@ impl<'a> Lexer<'a> {
         // important, for every path out of this loop you MUST consider what to
         // do with next_index given your context.
         'charloop: for char in c {
+            // even though we don't care about whitespace in the path, higher
+            // level lexors do, so we need to respect their needs for whitespace
+            // to break lexing a token.
+            if !escape_all && char.is_whitespace() {
+                break 'charloop;
+            }
             // Any special characters should be listed here. They can all be escaped
             match char {
                 TOKEN_DOT | TOKEN_WILDCARD | TOKEN_RECURSIVE | TOKEN_ARRAY_OPEN
                 | TOKEN_ARRAY_CLOSE | TOKEN_FIRST_OPEN | TOKEN_FIRST_CLOSE | TOKEN_MULTI_OPEN
-                | TOKEN_MULTI_CLOSE | TOKEN_FIRST_SEP | TOKEN_MULTI_SEP => {
+                | TOKEN_MULTI_CLOSE | TOKEN_FIRST_SEP | TOKEN_MULTI_SEP | TOKEN_KEY_WRAP
+                | TOKEN_EQUAL | TOKEN_GREATER | TOKEN_LESS | TOKEN_COLON | TOKEN_PLUS
+                | TOKEN_MINUS | TOKEN_MODULUS => {
                     // if the previous token was an escape token, we just want to add this to the
                     // existing token
                     if escape_next || escape_all {
@@ -201,6 +229,26 @@ impl<'a> Lexer<'a> {
             None
         }
     }
+
+    // consume whitespace will consume until it hits a non whitespace character.
+    pub fn consume_whitespace(&mut self) {
+        // if there is an escape token we are pasing some kind of string,
+        // so we don't want to consume this or we will lose strings with whitespace
+        // at the start like ' hello' or ' '
+        if self.escape_token.is_some() {
+            return;
+        }
+
+        let c = self.path[self.head..].chars();
+
+        for char in c {
+            if !char.is_whitespace() {
+                break;
+            }
+
+            self.head += char.len_utf8();
+        }
+    }
 }
 
 #[cfg(test)]
@@ -225,8 +273,16 @@ mod test {
 
     #[test]
     fn test_lexor() {
-        test_lexor!("Im.am a.fish", "Im", ".", "am a", ".", "fish");
-        test_lexor!(" ", " ");
+        test_lexor!(
+            "Im.\"am a\".fish",
+            "Im",
+            ".",
+            "\"",
+            "am a",
+            "\"",
+            ".",
+            "fish"
+        );
         test_lexor!(
             "labels.{hostname|host}",
             "labels",
@@ -270,6 +326,15 @@ mod test {
         test_lexor!("/.*/.something", "/", ".*", "/", ".", "something");
         test_lexor!("/asd\"asdf/", "/", "asd\"asdf", "/");
         test_lexor!(r#"\"a.b.c\""#, "\\\"a", ".", "b", ".", "c\\\"");
-        test_lexor!(r#"Im.am a.fish"#, "Im", ".", "am a", ".", "fish");
+        test_lexor!(
+            r#"Im."am a".fish"#,
+            "Im",
+            ".",
+            "\"",
+            "am a",
+            "\"",
+            ".",
+            "fish"
+        );
     }
 }
diff --git a/src/path/parser.rs b/src/path/parser.rs
@@ -86,6 +86,13 @@ impl Node {
     pub fn new_field_literal(field: &str) -> ParseResult<Node> {
         Ok(Node::FieldLiteral(FieldLiteral::from_escaped(field)))
     }
+
+    pub fn is_settable(&self) -> bool {
+        match self {
+            Node::FieldLiteral(_) => true,
+            _ => false,
+        }
+    }
 }
 
 impl fmt::Display for Node {
@@ -186,6 +193,17 @@ impl Path {
     pub fn append_key(&mut self, key: &str) {
         self.0.push(Node::FieldLiteral(FieldLiteral::new(key)));
     }
+
+    // is_settable tells us if the path defined can be set or not.
+    pub fn is_settable(&self) -> bool {
+        for n in self.0.iter() {
+            if !n.is_settable() {
+                return false;
+            }
+        }
+
+        return true;
+    }
 }
 
 impl Default for Path {
@@ -354,6 +372,9 @@ impl Parser<'_> {
     // function will return true, if you should stop because EOF was returned
     // from the parser it will return false
     pub fn parse(&mut self) -> ParseResult<Path> {
+        // consume any whitespace in the beginning.
+        self.lex.consume_whitespace();
+
         let mut nodes = vec![];
         loop {
             match self.parse_operator() {
@@ -440,6 +461,11 @@ impl Parser<'_> {
     fn consume(&mut self) {
         let _ = self.lex.token();
     }
+
+    // chars_consumed returns the number of characters the parser consumed.
+    pub fn chars_consumed(&self) -> usize {
+        self.lex.chars_consumed()
+    }
 }
 
 #[cfg(test)]
@@ -466,7 +492,7 @@ mod tests {
 
     #[test]
     fn test_parse() -> ParseResult<()> {
-        test_parse!("Im.am a.fish", "Im", "\"am a\"", "fish");
+        test_parse!("Im.\"am a\".fish", "Im", "\"am a\"", "fish");
         test_parse!("a.b.c.d.e", "a", "b", "c", "d", "e");
         test_parse!("a.\"b.a\".c", "a", "\"b.a\"", "c");
         test_parse!("a.b\\.a.c", "a", "\"b.a\"", "c");

diff --git a/src/query/aggregation/avg.rs b/src/query/aggregation/avg.rs
@@ -19,10 +19,10 @@ pub struct AvgAggregation {
 
 impl AvgAggregation {
     pub fn from_parser(parser: &mut Parser) -> QueryResult<AvgAggregation> {
-        parser.consume_token(AGGREGATION_AVG)?;
-        parser.consume_token(FN_OPEN)?;
+        parser.consume_next(AGGREGATION_AVG)?;
+        parser.consume_next(FN_OPEN)?;
         let expr = parser.parse_expression()?;
-        parser.consume_token(FN_CLOSE)?;
+        parser.consume_next(FN_CLOSE)?;
 
         Ok(AvgAggregation {
             expr,

diff --git a/src/query/aggregation/count.rs b/src/query/aggregation/count.rs
@@ -25,15 +25,15 @@ impl CountAggregation {
     }
 
     pub fn from_parser(parser: &mut Parser) -> QueryResult<CountAggregation> {
-        parser.consume_token(AGGREGATION_COUNT)?;
-        parser.consume_token(FN_OPEN)?;
+        parser.consume_next(AGGREGATION_COUNT)?;
+        parser.consume_next(FN_OPEN)?;
 
         let expr = match parser.peak() {
             Some(FN_CLOSE) => None,
             _ => Some(parser.parse_expression()?),
         };
 
-        parser.consume_token(FN_CLOSE)?;
+        parser.consume_next(FN_CLOSE)?;
 
         Ok(CountAggregation { expr, count: 0 })
     }

diff --git a/src/query/aggregation/cumulative_sum.rs b/src/query/aggregation/cumulative_sum.rs
@@ -24,10 +24,10 @@ impl CumulativeSum {
     }
 
     pub fn from_parser(parser: &mut Parser) -> QueryResult<Self> {
-        parser.consume_token(AGGREGATION_CUMULATIVE_SUM)?;
-        parser.consume_token(FN_OPEN)?;
+        parser.consume_next(AGGREGATION_CUMULATIVE_SUM)?;
+        parser.consume_next(FN_OPEN)?;
         let agg = parser.parse_aggregation()?;
-        parser.consume_token(FN_CLOSE)?;
+        parser.consume_next(FN_CLOSE)?;
 
         Ok(CumulativeSum { sum: None, agg })
     }

diff --git a/src/query/aggregation/math.rs b/src/query/aggregation/math.rs
@@ -25,12 +25,12 @@ macro_rules! math_aggregation {
             }
 
             pub fn from_parser(parser: &mut Parser) -> QueryResult<Self> {
-                parser.consume_token($fn)?;
-                parser.consume_token(FN_OPEN)?;
+                parser.consume_next($fn)?;
+                parser.consume_next(FN_OPEN)?;
                 let left = parser.parse_aggregation()?;
-                parser.consume_token(FN_SEP)?;
+                parser.consume_next(FN_SEP)?;
                 let right = parser.parse_aggregation()?;
-                parser.consume_token(FN_CLOSE)?;
+                parser.consume_next(FN_CLOSE)?;
 
                 Ok($name { left, right })
             }

diff --git a/src/query/aggregation/max.rs b/src/query/aggregation/max.rs
@@ -21,10 +21,10 @@ pub struct MaxAggregation {
 
 impl MaxAggregation {
     pub fn from_parser(parser: &mut Parser) -> QueryResult<MaxAggregation> {
-        parser.consume_token(AGGREGATION_MAX)?;
-        parser.consume_token(FN_OPEN)?;
+        parser.consume_next(AGGREGATION_MAX)?;
+        parser.consume_next(FN_OPEN)?;
         let value = parser.parse_expression()?;
-        parser.consume_token(FN_CLOSE)?;
+        parser.consume_next(FN_CLOSE)?;
 
         Ok(MaxAggregation { value, max: None })
     }

diff --git a/src/query/aggregation/min.rs b/src/query/aggregation/min.rs
@@ -21,10 +21,10 @@ pub struct MinAggregation {
 
 impl MinAggregation {
     pub fn from_parser(parser: &mut Parser) -> QueryResult<MinAggregation> {
-        parser.consume_token(AGGREGATION_MIN)?;
-        parser.consume_token(FN_OPEN)?;
+        parser.consume_next(AGGREGATION_MIN)?;
+        parser.consume_next(FN_OPEN)?;
         let value = parser.parse_expression()?;
-        parser.consume_token(FN_CLOSE)?;
+        parser.consume_next(FN_CLOSE)?;
 
         Ok(MinAggregation { value, min: None })
     }

diff --git a/src/query/aggregation/sum.rs b/src/query/aggregation/sum.rs
@@ -19,10 +19,10 @@ pub struct SumAggregation {
 
 impl SumAggregation {
     pub fn from_parser(parser: &mut Parser) -> QueryResult<SumAggregation> {
-        parser.consume_token(AGGREGATION_SUM)?;
-        parser.consume_token(FN_OPEN)?;
+        parser.consume_next(AGGREGATION_SUM)?;
+        parser.consume_next(FN_OPEN)?;
         let value = parser.parse_expression()?;
-        parser.consume_token(FN_CLOSE)?;
+        parser.consume_next(FN_CLOSE)?;
 
         Ok(SumAggregation { value, sum: None })
     }

diff --git a/src/query/expression/literal.rs b/src/query/expression/literal.rs
@@ -3,8 +3,8 @@ use std::{collections::HashMap, fmt::Display, ops::Deref};
 use crate::{
     query::{
         parser::{
-            Parser, ARRAY_CHILD_SEP, ARRAY_WRAP, ARRAY_WRAP_END, FALSE, KEY_WRAP, MAP_CHILD_SEP,
-            MAP_CHILD_SET, MAP_WRAP, MAP_WRAP_END, NULL, STRING_WRAP, TRUE,
+            Parser, ARRAY_CHILD_SEP, ARRAY_WRAP, ARRAY_WRAP_END, FALSE, IDENTIFIER_WRAP,
+            MAP_CHILD_SEP, MAP_CHILD_SET, MAP_WRAP, MAP_WRAP_END, NULL, STRING_WRAP, TRUE,
         },
         Error, QueryResult,
     },
@@ -21,7 +21,7 @@ pub struct StringExpression {
 
 impl StringExpression {
     pub fn from_parser(parser: &mut Parser) -> QueryResult<Self> {
-        parser.consume_token(STRING_WRAP)?;
+        parser.consume_next(STRING_WRAP)?;
         let value = match parser.token() {
             Some(tok) if tok == STRING_WRAP => {
                 return Ok(StringExpression {
@@ -63,7 +63,7 @@ pub struct NullExpression;
 
 impl NullExpression {
     pub fn from_parser(parser: &mut Parser) -> QueryResult<Self> {
-        parser.consume_token(NULL)?;
+        parser.consume_next(NULL)?;
         Ok(NullExpression)
     }
 }
@@ -180,12 +180,12 @@ pub struct MapLiteral(HashMap<String, Box<dyn Expression>>);
 
 impl MapLiteral {
     pub fn from_parser(parser: &mut Parser) -> QueryResult<Self> {
-        parser.consume_token(MAP_WRAP)?;
+        parser.consume_next(MAP_WRAP)?;
         let mut map = HashMap::new();
         loop {
             // pase 'key': <expression>
-            let key = parser.parse_string(KEY_WRAP)?;
-            parser.consume_token(MAP_CHILD_SET)?;
+            let key = parser.parse_string(IDENTIFIER_WRAP)?;
+            parser.consume_next(MAP_CHILD_SET)?;
             let value = parser.parse_expression()?;
 
             map.insert(key, value);
@@ -232,7 +232,11 @@ impl Display for MapLiteral {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         write!(f, "{{")?;
         for (k, v) in self.iter() {
-            write!(f, "'{}': {}, ", k, v)?;
+            write!(
+                f,
+                "{}{}{}{} {}, ",
+                IDENTIFIER_WRAP, k, IDENTIFIER_WRAP, MAP_CHILD_SET, v
+            )?;
         }
         write!(f, "}}")
     }
@@ -243,7 +247,7 @@ pub struct ArrayLiteral(Vec<Box<dyn Expression>>);
 
 impl ArrayLiteral {
     pub fn from_parser(parser: &mut Parser) -> QueryResult<Self> {
-        parser.consume_token(ARRAY_WRAP)?;
+        parser.consume_next(ARRAY_WRAP)?;
         let mut arr = Vec::new();
         loop {
             let value = parser.parse_expression()?;

diff --git a/src/query/expression/math.rs b/src/query/expression/math.rs
@@ -21,12 +21,12 @@ macro_rules! impl_math_op {
 
         impl $name {
             pub fn from_parser(parser: &mut Parser) -> QueryResult<Self> {
-                parser.consume_token($fn)?;
-                parser.consume_token(FN_OPEN)?;
+                parser.consume_next($fn)?;
+                parser.consume_next(FN_OPEN)?;
                 let left = parser.parse_expression()?;
-                parser.consume_token(FN_SEP)?;
+                parser.consume_next(FN_SEP)?;
                 let right = parser.parse_expression()?;
-                parser.consume_token(FN_CLOSE)?;
+                parser.consume_next(FN_CLOSE)?;
 
                 Ok($name { left, right })
             }