OISF · jlucovsky · Feb 22, 2024 · Feb 22, 2024 · Feb 22, 2024 · Feb 23, 2024
@@ -10,6 +10,8 @@ base64_decode
 
 Decodes base64 data from a buffer and makes it available for the base64_data function.
 
+We recommend using the base64 transform instead -- see :ref:`from_base64 <from_base64>`.
+
 Syntax::
 
     base64_decode:bytes <value>, offset <value>, relative;

@@ -96,7 +96,7 @@ to_uppercase
 
 Converts the buffer to uppercase and passes the value on.
 
-This example alerts if ``http.uri`` contains ``THIS TEXT HAS BEEN CONVERTED TO LOWERCASE``
+This example alerts if ``http.uri`` contains ``THIS TEXT HAS BEEN CONVERTED TO UPPERCASE``
 
 Example::
 
@@ -188,3 +188,58 @@ Example::
 
     alert http any any -> any any (msg:"HTTP ua only"; http.header_names; \
        bsize:16; content:"|0d 0a|User-Agent|0d 0a 0d 0a|"; nocase; sid:1;)
+
+.. _from_base64:
+
+from_base64
+-----------
+
+This transform is similar to the keyword ``base64_decode``: the buffer is decoded using
+the optional values for ``mode``, ``offset`` and ``bytes`` and is available for matching
+on the decoded data.
+
+After this transform completes, the buffer will contain only bytes that could be bases64-decoded.
+If the decoding process encountered invalid bytes, those will not be included in the buffer.
+
+The option values must be ``,`` separated and can appear in any order.
+
+.. note:: ``from_base64`` follows RFC 4648 by default i.e. encounter with any character
+   that is not found in the base64 alphabet leads to rejection of that character and the
+   rest of the string.
+
+Format::
+
+    from_base64: [[bytes <value>] [, offset <offset_value> [, mode: strict|rfc4648|rfc2045]]]
+
+There are defaults for each of the options:
+- ``bytes`` defaults to the length of the input buffer
+- ``offset`` defaults to ``0`` and must be less than ``65536``
+- ``mode`` defaults to ``rfc4648``
+
+Note that both ``bytes`` and ``offset`` may be variables from `byte_extract` and/or `byte_math` in
+later versions of Suricata. They are not supported yet.
+
+Mode ``rfc4648`` applies RFC 4648 decoding logic which is suitable for encoding binary
+data that can be safely sent by email, used in a URL, or included with HTTP POST requests.
+
+Mode ``rfc2045`` applies RFC 2045 decoding logic which supports strings, including those with embedded spaces,
+line breaks, and any non base64 alphabet.
+
+Mode ``strict`` will fail if an invalid character is found in the encoded bytes.
+
+The following examples will alert when the buffer contents match (see the
+last ``content`` value for the expected strings).
+
+This example uses the defaults and transforms `"VGhpcyBpcyBTdXJpY2F0YQ=="` to `"This is Suricata"`::
+
+       content: "VGhpcyBpcyBTdXJpY2F0YQ=="; from_base64; content:"This is Suricata";
+
+This example transforms `"dGhpc2lzYXRlc3QK"` to `"thisisatest"`::
+
+       content:"/?arg=dGhpc2lzYXRlc3QK"; from_base64: offset 6, mode rfc4648; \
+       content:"thisisatest";
+
+This example transforms `"Zm 9v Ym Fy"` to `"foobar"`::
+
+       content:"/?arg=Zm 9v Ym Fy"; from_base64: offset 6, mode rfc2045; \
+       content:"foobar";
@@ -60,6 +60,8 @@ Major changes
 - It is possible to see an increase of alerts, for the same rule-sets, if you
   use many stream/payload rules, due to Suricata triggering TCP stream
   reassembly earlier.
+- New transform ``from_base64`` that base64 decodes a buffer and passes the
+  decoded buffer. It's recommended that ``from_base64`` be used instead of ``base64_decode``
 
 Upgrading 6.0 to 7.0
 --------------------

@@ -18,7 +18,7 @@
 // Author: Jeff Lucovsky <jlucovsky@oisf.net>
 
 use crate::detect::error::RuleParseError;
-use crate::detect::parser::{parse_token, take_until_whitespace};
+use crate::detect::parser::{parse_var, take_until_whitespace, ResultValue};
 use std::ffi::{CStr, CString};
 use std::os::raw::c_char;
 
@@ -88,12 +88,6 @@ pub const DETECT_BYTEMATH_FIXED_PARAM_COUNT: usize = 5;
 // Optional parameters: endian, relative, string, dce, bitmask
 pub const DETECT_BYTEMATH_MAX_PARAM_COUNT: usize = 10;
 
-#[derive(Debug)]
-enum ResultValue {
-    Numeric(u64),
-    String(String),
-}
-
 #[repr(C)]
 #[derive(Debug)]
 pub struct DetectByteMathData {
@@ -194,17 +188,6 @@ fn get_endian_value(value: &str) -> Result<ByteMathEndian, ()> {
     Ok(res)
 }
 
-// Parsed as a u64 for validation with u32 {min,max} so values greater than uint32
-// are not treated as a string value.
-fn parse_var(input: &str) -> IResult<&str, ResultValue, RuleParseError<&str>> {
-    let (input, value) = parse_token(input)?;
-    if let Ok(val) = value.parse::<u64>() {
-        Ok((input, ResultValue::Numeric(val)))
-    } else {
-        Ok((input, ResultValue::String(value.to_string())))
-    }
-}
-
 fn parse_bytemath(input: &str) -> IResult<&str, DetectByteMathData, RuleParseError<&str>> {
     // Inner utility function for easy error creation.
     fn make_error(reason: String) -> nom7::Err<RuleParseError<&'static str>> {

@@ -20,9 +20,12 @@ use nom7::error::{ErrorKind, ParseError};
 /// Custom rule parse errors.
 ///
 /// Implemented based on the Nom example for implementing custom errors.
+/// The string is an error message provided by the parsing logic, e.g.,
+///      Incorrect usage because of "x", "y" and "z"
 #[derive(Debug, PartialEq, Eq)]
 pub enum RuleParseError<I> {
     InvalidByteMath(String),
+    InvalidTransformBase64(String),
 
     Nom(I, ErrorKind),
 }

@@ -22,6 +22,7 @@ pub mod error;
 pub mod iprep;
 pub mod parser;
 pub mod stream_size;
+pub mod transform_base64;
 pub mod uint;
 pub mod uri;
 pub mod requires;

@@ -22,12 +22,27 @@ use nom7::character::complete::multispace0;
 use nom7::sequence::preceded;
 use nom7::IResult;
 
+#[derive(Debug)]
+pub enum ResultValue {
+    Numeric(u64),
+    String(String),
+}
+
 static WHITESPACE: &str = " \t\r\n";
 /// Parse all characters up until the next whitespace character.
 pub fn take_until_whitespace(input: &str) -> IResult<&str, &str, RuleParseError<&str>> {
     nom7::bytes::complete::is_not(WHITESPACE)(input)
 }
 
+// Parsed as a u64 so the value can be validated against a u32 min/max if needed.
+pub fn parse_var(input: &str) -> IResult<&str, ResultValue, RuleParseError<&str>> {
+    let (input, value) = parse_token(input)?;
+    if let Ok(val) = value.parse::<u64>() {
+        Ok((input, ResultValue::Numeric(val)))
+    } else {
+        Ok((input, ResultValue::String(value.to_string())))
+    }
+}
 /// Parse the next token ignoring leading whitespace.
 ///
 /// A token is the next sequence of chars until a terminating character. Leading whitespace