Skip to content

Commit

Permalink
Added initial support for several new commands: regex, rex, rename, s…
Browse files Browse the repository at this point in the history
…ort, table, where. Added some priority markers for commands and functions based on Splunk calling them out as commonly used.
  • Loading branch information
scnerd committed Sep 10, 2024
1 parent 34ff568 commit f256cba
Show file tree
Hide file tree
Showing 21 changed files with 674 additions and 249 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ repos:
args: [--fix=lf]
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.6.3
rev: v0.6.4
hooks:
# Run the linter.
- id: ruff
Expand Down
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spl_transpiler"
version = "0.1.3"
version = "0.1.4"
edition = "2021"

[lib]
Expand All @@ -13,6 +13,7 @@ pyo3-built = "*"
float-derive = "*"
anyhow = "*"
log = "*"
regex-syntax = "*"

inventory = "*"
phf = { version = "*", features = ["macros"] }
Expand Down
308 changes: 154 additions & 154 deletions README.md

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions src/commands/cmd_dedup/spl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ impl SplCommand<DedupCommand> for DedupParser {
opt(map(alt((tag("+"), tag("-"))), Into::into)),
map(field, Into::into),
))),
|fields_to_sort| SortCommand { fields_to_sort },
|fields_to_sort| SortCommand::new_simple(fields_to_sort),
),
)),
)),
Expand All @@ -123,9 +123,10 @@ impl SplCommand<DedupCommand> for DedupParser {
keep_events: options.keep_events,
keep_empty: options.keep_empty,
consecutive: options.consecutive,
sort_by: sort_by.unwrap_or(SortCommand {
fields_to_sort: vec![(Some("+".into()), ast::Field::from("_no").into())],
}),
sort_by: sort_by.unwrap_or(SortCommand::new_simple(vec![(
Some("+".into()),
ast::Field::from("_no").into(),
)])),
},
)(input)
}
Expand Down
25 changes: 22 additions & 3 deletions src/commands/cmd_regex/pyspark.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,31 @@
use crate::ast::ast;
use crate::commands::cmd_regex::spl::RegexCommand;
use crate::pyspark::ast::*;
use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer};
use anyhow::bail;

impl PipelineTransformer for RegexCommand {
#[allow(unused_variables, unreachable_code)]
fn transform(&self, state: PipelineTransformState) -> anyhow::Result<PipelineTransformState> {
let df = state.df;
let mut df = state.df;

unimplemented!();
let (field, invert) = match self.item.clone() {
None => ("_raw".to_string(), false),
Some((ast::Field(field), comparison)) => match comparison.as_str() {
"=" => (field, false),
"!=" => (field, true),
_ => bail!("Invalid regex comparison: {}", comparison),
},
};

let mut col = column_like!(regexp_like(
[col(field)],
[Expr::Raw(Raw(format!("r\"{}\"", self.regex)))]
));
if invert {
col = column_like!(~[col]);
}

df = df.where_(col);

Ok(PipelineTransformState { df })
}
Expand Down
18 changes: 15 additions & 3 deletions src/commands/cmd_rename/pyspark.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
use crate::ast::ast;
use crate::commands::cmd_rename::spl::RenameCommand;
use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer};
use anyhow::bail;

impl PipelineTransformer for RenameCommand {
#[allow(unused_variables, unreachable_code)]
fn transform(&self, state: PipelineTransformState) -> anyhow::Result<PipelineTransformState> {
let df = state.df;
let mut df = state.df;

unimplemented!();
for alias in self.alias.clone() {
let old_name = match *alias.expr {
ast::Expr::Leaf(ast::LeafExpr::Constant(ast::Constant::Field(ast::Field(
name,
)))) => name,
ast::Expr::Leaf(ast::LeafExpr::Constant(ast::Constant::Wildcard(
ast::Wildcard(_name),
))) => bail!("UNIMPLEMENTED: Wildcard renaming is not supported yet"),
_ => bail!("Unsupported rename source: {:?}", alias),
};
df = df.with_column_renamed(old_name, alias.name.clone());
}

Ok(PipelineTransformState { df })
}
Expand Down
28 changes: 25 additions & 3 deletions src/commands/cmd_rex/pyspark.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,34 @@
use crate::commands::cmd_rex::spl::RexCommand;
use crate::commands::regex_utils::get_groups;
use crate::pyspark::ast::*;
use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer};
use anyhow::ensure;

impl PipelineTransformer for RexCommand {
#[allow(unused_variables, unreachable_code)]
fn transform(&self, state: PipelineTransformState) -> anyhow::Result<PipelineTransformState> {
let df = state.df;
let mut df = state.df;

unimplemented!();
ensure!(
self.mode != Some("sed".into()),
"sed-mode rex commands are not yet supported."
);
ensure!(
self.max_match == 1,
"Rex not yet implemented for multiple matches"
);

let regex_groups = get_groups(self.regex.clone())?;

for (group_index, group_name) in regex_groups {
df = df.with_column(
group_name.unwrap_or(group_index.to_string()),
column_like!(regexp_extract(
[col(self.field.clone())],
[Expr::Raw(Raw(format!("r\"{}\"", self.regex)))],
[py_lit(group_index as i64)]
)),
);
}

Ok(PipelineTransformState { df })
}
Expand Down
17 changes: 12 additions & 5 deletions src/commands/cmd_rex/spl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::ast::ast::ParsedCommandOptions;
use crate::ast::python::impl_pyclass;
use crate::commands::spl::{SplCommand, SplCommandOptions};
use crate::spl::double_quoted;
use anyhow::ensure;
use nom::combinator::map;
use nom::sequence::pair;
use nom::IResult;
Expand All @@ -22,7 +23,7 @@ use pyo3::prelude::*;
#[pyclass(frozen, eq, hash)]
pub struct RexCommand {
#[pyo3(get)]
pub field: Option<String>,
pub field: String,
#[pyo3(get)]
pub max_match: i64,
#[pyo3(get)]
Expand All @@ -33,7 +34,7 @@ pub struct RexCommand {
pub regex: String,
}
impl_pyclass!(RexCommand {
field: Option<String>,
field: String,
max_match: i64,
offset_field: Option<String>,
mode: Option<String>,
Expand All @@ -43,7 +44,7 @@ impl_pyclass!(RexCommand {
#[derive(Debug, Default)]
pub struct RexParser {}
pub struct RexCommandOptions {
field: Option<String>,
field: String,
max_match: i64,
offset_field: Option<String>,
mode: Option<String>,
Expand All @@ -55,11 +56,17 @@ impl TryFrom<ParsedCommandOptions> for RexCommandOptions {
type Error = anyhow::Error;

fn try_from(value: ParsedCommandOptions) -> Result<Self, Self::Error> {
let mode = value
.get_string_option("mode")?
.map(|s| s.to_ascii_lowercase());
if let Some(mode_str) = mode.clone() {
ensure!(mode_str == "sed", "Invalid rex mode: {}", mode_str);
};
Ok(Self {
field: value.get_string_option("field")?,
field: value.get_string("field", "_raw")?,
max_match: value.get_int("max_match", 1)?,
offset_field: value.get_string_option("offset_field")?,
mode: value.get_string_option("mode")?,
mode,
})
}
}
Expand Down
60 changes: 57 additions & 3 deletions src/commands/cmd_sort/pyspark.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,66 @@
use crate::ast::ast;
use crate::commands::cmd_sort::spl::SortCommand;
use crate::pyspark::ast::*;
use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer};
use anyhow::{bail, Result};

/// Converts a generic column-like expression into an explicitly ascending or descending ordering
fn _ascending_or_descending(
c: ColumnLike,
sign: Option<String>,
descending: bool,
) -> Result<ColumnLike> {
let descending = match (sign, descending) {
(None, descending) => descending,
(Some(sign), descending) => match sign.as_str() {
"+" => descending,
"-" => !descending,
_ => bail!("Invalid sort sign `{}`", sign),
},
};
Ok(if descending {
column_like!([c].desc())
} else {
column_like!([c].asc())
})
}

/// Resolves the sort expression to something we know how to handle
fn _resolve_expr(e: ast::Expr) -> Result<ColumnLike> {
match e {
ast::Expr::Leaf(ast::LeafExpr::Constant(ast::Constant::Field(ast::Field(name)))) => {
Ok(column_like!(col(name)))
}
ast::Expr::Call(ast::Call { name, .. }) => match name.as_str() {
"auto" => unimplemented!(),
"ip" => unimplemented!(),
"num" => unimplemented!(),
"str" => unimplemented!(),
_ => bail!("Unsupported sort function: {}", name),
},
_ => bail!("Unsupported sort expression: {:?}", e),
}
}

impl PipelineTransformer for SortCommand {
#[allow(unused_variables, unreachable_code)]
fn transform(&self, state: PipelineTransformState) -> anyhow::Result<PipelineTransformState> {
let df = state.df;
let mut df = state.df;

let sort_fields: Result<Vec<_>> = self
.fields_to_sort
.iter()
.cloned()
.map(|(sign, expr)| {
let col = _resolve_expr(expr)?;
Ok(_ascending_or_descending(col, sign, self.descending)?)
})
.collect();

df = df.order_by(sort_fields?);

unimplemented!();
if self.count != 0 {
df = df.limit(self.count as u64);
}

Ok(PipelineTransformState { df })
}
Expand Down
49 changes: 40 additions & 9 deletions src/commands/cmd_sort/spl.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use crate::ast::ast::{Expr, ParsedCommandOptions};
use crate::ast::python::impl_pyclass;
use crate::commands::spl::{SplCommand, SplCommandOptions};
use crate::spl::{expr, ws};
use crate::spl::{expr, int, ws};
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::bytes::complete::{tag, tag_no_case};
use nom::combinator::{map, opt};
use nom::multi::separated_list1;
use nom::sequence::pair;
use nom::sequence::{pair, preceded, tuple};
use nom::IResult;
use pyo3::prelude::*;
//
Expand All @@ -18,8 +18,31 @@ use pyo3::prelude::*;
pub struct SortCommand {
#[pyo3(get)]
pub fields_to_sort: Vec<(Option<String>, Expr)>,
#[pyo3(get)]
pub count: i64,
#[pyo3(get)]
pub descending: bool,
}
impl_pyclass!(SortCommand { fields_to_sort: Vec<(Option<String>, Expr)>, count: i64, descending: bool });
impl Default for SortCommand {
fn default() -> Self {
SortCommand {
fields_to_sort: Vec::new(),
count: 10000,
descending: false,
}
}
}
impl SortCommand {
/// Simple constructor for creating an unlimited sort command. Note that `.default()` uses the `sort` command's default limit of 10,000.
pub fn new_simple(fields_to_sort: Vec<(Option<String>, Expr)>) -> Self {
SortCommand {
fields_to_sort,
count: 0,
descending: false,
}
}
}
impl_pyclass!(SortCommand { fields_to_sort: Vec<(Option<String>, Expr)> });

#[derive(Debug, Default)]
pub struct SortParser {}
Expand All @@ -41,11 +64,19 @@ impl SplCommand<SortCommand> for SortParser {

fn parse_body(input: &str) -> IResult<&str, SortCommand> {
map(
separated_list1(
ws(tag(",")),
pair(opt(map(alt((tag("+"), tag("-"))), String::from)), expr),
),
|fields_to_sort| SortCommand { fields_to_sort },
tuple((
opt(preceded(opt(ws(tag_no_case("limit="))), ws(int))),
separated_list1(
ws(tag(",")),
pair(opt(map(alt((tag("+"), tag("-"))), String::from)), expr),
),
opt(ws(tag_no_case("desc"))),
)),
|(count, fields_to_sort, desc)| SortCommand {
fields_to_sort,
count: count.map(|v| v.0).unwrap_or(10000),
descending: desc.is_some(),
},
)(input)
}
}
11 changes: 8 additions & 3 deletions src/commands/cmd_table/pyspark.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
use crate::commands::cmd_table::spl::TableCommand;
use crate::pyspark::ast::*;
use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer};

impl PipelineTransformer for TableCommand {
#[allow(unused_variables, unreachable_code)]
fn transform(&self, state: PipelineTransformState) -> anyhow::Result<PipelineTransformState> {
let df = state.df;
let mut df = state.df;

unimplemented!();
df = df.select(
self.fields
.iter()
.map(|field| column_like!(col(field.0.clone())))
.collect(),
);

Ok(PipelineTransformState { df })
}
Expand Down
3 changes: 2 additions & 1 deletion src/commands/cmd_top/pyspark.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
use super::spl::*;
use crate::pyspark::ast::*;
use crate::pyspark::transpiler::{PipelineTransformState, PipelineTransformer};
use anyhow::bail;

impl PipelineTransformer for TopCommand {
fn transform(&self, state: PipelineTransformState) -> anyhow::Result<PipelineTransformState> {
let mut df = state.df;

if self.use_other {
unimplemented!("No implementation yet for `useother=true` in `top`")
bail!("UNIMPLEMENTED: No implementation yet for `useother=true` in `top`")
}

let groupby_fields: Vec<_> = self
Expand Down
Loading

0 comments on commit f256cba

Please sign in to comment.