From e46b0337e50897424d05371aa26d5f20e172f749 Mon Sep 17 00:00:00 2001
From: Shivani Bhardwaj <shivani@oisf.net>
Date: Fri, 17 Mar 2023 17:48:35 +0530
Subject: [PATCH] util/base64: skip any invalid char for RFC2045

RFC 2045 states that any invalid character should be skipped over, this
is the RFC used by mime handler in Suricata code to deal with base64
encoded data.
So far, only spaces were skipped as a part of implementation of this
RFC, extend it to also skip over any other invalid character. Add
corresponding test.
---
 src/util-base64.c | 18 +++++++++++-------
 src/util-base64.h |  4 ++--
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/util-base64.c b/src/util-base64.c
index 543a4511c023..3a5754a07420 100644
--- a/src/util-base64.c
+++ b/src/util-base64.c
@@ -107,9 +107,9 @@ Base64Ecode DecodeBase64(uint8_t *dest, uint32_t dest_size, const uint8_t *src,
         /* Get decimal representation */
         val = GetBase64Value(src[i]);
         if (val < 0) {
-            if ((mode == BASE64_MODE_RFC2045) && (src[i] == ' ')) {
+            if (mode == BASE64_MODE_RFC2045 && src[i] != '=') {
                 if (bbidx == 0) {
-                    /* Special case where last block of data has a leading space */
+                    /* Special case where last block of data has a leading space or invalid char */
                     leading_sp++;
                 }
                 sp++;
@@ -156,7 +156,7 @@ Base64Ecode DecodeBase64(uint8_t *dest, uint32_t dest_size, const uint8_t *src,
         }
     }
 
-    if (!valid && mode == BASE64_MODE_RFC4648) {
+    if (bbidx > 0 && bbidx < 4 && ((!valid && mode == BASE64_MODE_RFC4648))) {
         padding = B64_BLOCK - bbidx;
         *decoded_bytes += ASCII_BLOCK - padding;
         DecodeBase64Block(dptr, b64);
@@ -304,9 +304,12 @@ static int B64TestVectorsRFC2045(void)
     const char *fin_str8 = "foobar";
 
     const char *src9 = "Zm$9vYm.Fy";
-    const char *fin_str9 = "";
+    const char *fin_str9 = "foobar";
 
-    TEST_RFC2045(src1, fin_str1, strlen(fin_str1), strlen(fin_str1), strlen(src1), BASE64_ECODE_OK);
+    const char *src10 = "Y21Wd2IzSjBaVzFoYVd4bWNtRjFaRUJoZEc4dVoyOTJMbUYxOmpqcHh4b3Rhb2w%5d";
+    const char *fin_str10 = "cmVwb3J0ZW1haWxmcmF1ZEBhdG8uZ292LmF1:jjpxxotaol9t";
+
+    TEST_RFC2045(src1, fin_str1, ASCII_BLOCK * 2, strlen(fin_str1), strlen(src1), BASE64_ECODE_OK);
     TEST_RFC2045(src2, fin_str2, ASCII_BLOCK * 2, strlen(fin_str2), strlen(src2), BASE64_ECODE_OK);
     TEST_RFC2045(src3, fin_str3, ASCII_BLOCK * 2, strlen(fin_str3), strlen(src3), BASE64_ECODE_OK);
     TEST_RFC2045(src4, fin_str4, ASCII_BLOCK * 2, strlen(fin_str4), strlen(src4), BASE64_ECODE_OK);
@@ -314,8 +317,9 @@ static int B64TestVectorsRFC2045(void)
     TEST_RFC2045(src6, fin_str6, ASCII_BLOCK * 2, strlen(fin_str6), strlen(src6), BASE64_ECODE_OK);
     TEST_RFC2045(src7, fin_str7, ASCII_BLOCK * 2, strlen(fin_str7), strlen(src7), BASE64_ECODE_OK);
     TEST_RFC2045(src8, fin_str8, ASCII_BLOCK * 2, strlen(fin_str8), strlen(src8), BASE64_ECODE_OK);
-    TEST_RFC2045(src9, fin_str9, ASCII_BLOCK * 2, 0, 0,
-            BASE64_ECODE_ERR); // TODO this should be accepted just like the previous string
+    TEST_RFC2045(src9, fin_str9, ASCII_BLOCK * 2, strlen(fin_str9), strlen(src9), BASE64_ECODE_OK);
+    TEST_RFC2045(src10, fin_str10, strlen(fin_str10) + 3, strlen(fin_str10), strlen(src10),
+            BASE64_ECODE_OK);
     PASS;
 }
 
diff --git a/src/util-base64.h b/src/util-base64.h
index ae07b268a000..66e847a1bdc2 100644
--- a/src/util-base64.h
+++ b/src/util-base64.h
@@ -45,8 +45,8 @@ typedef enum {
      * BASE64("fooba") = "Zm9vYmE="
      * BASE64("foobar") = "Zm9vYmFy"
      * BASE64("foobar") = "Zm 9v Ym Fy"   <-- Notice how the spaces are ignored
-     * BASE64("f") = "Zm$9vYm.Fy"    # TODO according to RFC, All line breaks or *other characters*
-     * not found in base64 alphabet must be ignored by decoding software
+     * BASE64("foobar") = "Zm$9vYm.Fy"    # According to RFC 2045, All line breaks or *other
+     * characters* not found in base64 alphabet must be ignored by decoding software
      * */
     BASE64_MODE_RFC2045, /* SPs are allowed during transfer but must be skipped by Decoder */
     BASE64_MODE_STRICT,