From f9e7f46902f074de5812a4d57a110ebe11452379 Mon Sep 17 00:00:00 2001 From: Seth Grover Date: Wed, 5 Jun 2024 12:43:10 -0600 Subject: [PATCH] for idaholab/Malcolm#465, work in progress handling uploading evtx files --- docs/upload.md | 3 +- filebeat/filebeat-logs.yml | 18 +++ filebeat/scripts/clean-processed-folder.py | 2 +- .../scripts/filebeat-process-zeek-folder.sh | 127 +++++++++++------- .../filebeat-watch-zeeklogs-uploads-folder.py | 15 ++- .../share/zeek/site/extractor_params.zeek | 1 + .../zeek/extractor_override.interesting.zeek | 1 + .../pipelines/enrichment/23_severity.conf | 1 + logstash/pipelines/output/98_finalize.conf | 1 + .../scripts/watch-pcap-uploads-folder.py | 2 +- shared/bin/evtx_to_jsonl.sh | 65 +++++++-- .../extractor_override.interesting.zeek | 1 + zeek/config/extractor_params.zeek | 1 + 13 files changed, 173 insertions(+), 65 deletions(-) diff --git a/docs/upload.md b/docs/upload.md index 4210402e7..44e76e52a 100644 --- a/docs/upload.md +++ b/docs/upload.md @@ -14,9 +14,10 @@ The types of files supported are: * PCAP files (of mime type `application/vnd.tcpdump.pcap` or `application/x-pcapng`) - PCAPNG files are *partially* supported: Zeek is able to process PCAPNG files, but not all of Arkime's packet examination features work correctly -* Zeek logs in archive files (`application/gzip`, `application/x-gzip`, `application/x-7z-compressed`, `application/x-bzip2`, `application/x-cpio`, `application/x-lzip`, `application/x-lzma`, `application/x-rar-compressed`, `application/x-tar`, `application/x-xz`, or `application/zip`) +* Zeek logs (with a `.log` file extension) in archive files (`application/gzip`, `application/x-gzip`, `application/x-7z-compressed`, `application/x-bzip2`, `application/x-cpio`, `application/x-lzip`, `application/x-lzma`, `application/x-rar-compressed`, `application/x-tar`, `application/x-xz`, or `application/zip`) - because log fields may differ depending on Zeek's configuration, users are recommended to use [Zeek JSON format logs](https://docs.zeek.org/en/master/log-formats.html#zeek-json-format-logs) when generating Zeek logs outside of Malcolm to later be uploaded to Malcolm for procesing - where the Zeek logs are found in the internal directory structure in the archive file does not matter +* Microsoft Windows [event log files](https://learn.microsoft.com/en-us/windows/win32/eventlog/event-log-file-format) (with a `.evtx` file extension) uploaded directly or in archive files (`application/gzip`, `application/x-gzip`, `application/x-7z-compressed`, `application/x-bzip2`, `application/x-cpio`, `application/x-lzip`, `application/x-lzma`, `application/x-rar-compressed`, `application/x-tar`, `application/x-xz`, or `application/zip`) Files uploaded via these methods are monitored and moved automatically to other directories for processing, generally within 1 minute of completion of the upload. diff --git a/filebeat/filebeat-logs.yml b/filebeat/filebeat-logs.yml index f7b1bf181..5add7ad71 100644 --- a/filebeat/filebeat-logs.yml +++ b/filebeat/filebeat-logs.yml @@ -102,6 +102,24 @@ filebeat.inputs: close_eof: false clean_removed: ${FILEBEAT_CLEAN_REMOVED:true} +#-------------------------- Uploaded Windows EVTX Logs (as JSON) --------------- +# (see evtx_to_jsonl.sh) +- type: log + paths: + - ${FILEBEAT_ZEEK_LOG_PATH:/zeek/current}/*.evtx.json + symlinks: true + fields_under_root: true + tags: ["_malcolm_beats","_evtx_to_json"] + compression_level: 0 + scan_frequency: ${FILEBEAT_SCAN_FREQUENCY:10s} + clean_inactive: ${FILEBEAT_CLEAN_INACTIVE:180m} + ignore_older: ${FILEBEAT_IGNORE_OLDER:120m} + close_inactive: ${FILEBEAT_CLOSE_INACTIVE:120s} + close_renamed: ${FILEBEAT_CLOSE_RENAMED:true} + close_removed: ${FILEBEAT_CLOSE_REMOVED:true} + close_eof: ${FILEBEAT_CLOSE_EOF:true} + clean_removed: ${FILEBEAT_CLEAN_REMOVED:true} + #================================ Outputs ====================================== #-------------------------- Logstash Output ------------------------------------ diff --git a/filebeat/scripts/clean-processed-folder.py b/filebeat/scripts/clean-processed-folder.py index 776a1eb3d..19be4ade8 100755 --- a/filebeat/scripts/clean-processed-folder.py +++ b/filebeat/scripts/clean-processed-folder.py @@ -31,7 +31,7 @@ nowTime = time.time() logMimeTypeRegex = re.compile(r"(text/plain|application/(x-nd)?json)") archiveMimeTypeRegex = re.compile( - r"(application/gzip|application/x-gzip|application/x-7z-compressed|application/x-bzip2|application/x-cpio|application/x-lzip|application/x-lzma|application/x-rar-compressed|application/x-tar|application/x-xz|application/zip)" + r"(application/gzip|application/x-gzip|application/x-7z-compressed|application/x-bzip2|application/x-cpio|application/x-lzip|application/x-lzma|application/x-rar-compressed|application/x-tar|application/x-xz|application/zip|application/x-ms-evtx)" ) diff --git a/filebeat/scripts/filebeat-process-zeek-folder.sh b/filebeat/scripts/filebeat-process-zeek-folder.sh index b7528cb05..72a46e366 100755 --- a/filebeat/scripts/filebeat-process-zeek-folder.sh +++ b/filebeat/scripts/filebeat-process-zeek-folder.sh @@ -38,7 +38,7 @@ if mkdir $LOCKDIR; then # get new logs ready for processing cd "$ZEEK_LOGS_DIR" - find . -path ./processed -prune -o -path ./current -prune -o -path ./upload -prune -o -path ./extract_files -prune -o -path ./live -prune -o -type f -exec file --separator '|' --mime-type "{}" \; | grep -P "(application/gzip|application/x-gzip|application/x-7z-compressed|application/x-bzip2|application/x-cpio|application/x-lzip|application/x-lzma|application/x-rar-compressed|application/x-tar|application/x-xz|application/zip|application/x-ms-evtx)" | sort -V | \ + find . -path ./processed -prune -o -path ./current -prune -o -path ./upload -prune -o -path ./extract_files -prune -o -path ./live -prune -o -type f -exec file --separator '|' --mime-type "{}" \; | grep -P "(application/gzip|application/x-gzip|application/x-7z-compressed|application/x-bzip2|application/x-cpio|application/x-lzip|application/x-lzma|application/x-rar-compressed|application/x-tar|application/x-xz|application/zip|application/x-ms-evtx|application/octet-stream)" | sort -V | \ xargs -n 1 -P $FILEBEAT_PREPARE_PROCESS_COUNT -I '{}' bash -c ' # separate filename and mime type @@ -50,60 +50,89 @@ if mkdir $LOCKDIR; then FILEMIME="${FILEMIME#"${FILEMIME%%[![:space:]]*}"}" FILEMIME="${FILEMIME%"${FILEMIME##*[![:space:]]}"}" - fuser -s "$FILENAME" 2>/dev/null - if [[ $? -ne 0 ]] - then - . $SCRIPT_DIR/filebeat-process-zeek-folder-functions.sh - - PROCESS_TIME=$(date +%s%N) - SOURCEDIR="$(dirname "$FILENAME")" - DESTDIR="./processed/$SOURCEDIR" - DESTNAME="$DESTDIR/$(basename "$FILENAME")" - DESTDIR_EXTRACTED="${DESTNAME}_${PROCESS_TIME}" - LINKDIR="./current" - USERTAG=false - - TAGS=() - IFS=",-/_." read -r -a SOURCESPLIT <<< $(echo "$FILENAME" | sed "s/\.[^.]*$//") - echo "\"$FILENAME\" -> \"${DESTNAME}\"" - for index in "${!SOURCESPLIT[@]}" - do - TAG_CANDIDATE="${SOURCESPLIT[index]}" - if ! in_array TAGS "$TAG_CANDIDATE"; then - if [[ "$TAG_CANDIDATE" = "USERTAG" ]]; then - USERTAG=true - elif [[ -n $TAG_CANDIDATE && ! $TAG_CANDIDATE =~ ^[0-9-]+$ && $TAG_CANDIDATE != "tar" && $TAG_CANDIDATE != "AUTOZEEK" && ! $TAG_CANDIDATE =~ ^AUTOCARVE ]]; then - TAGS+=("${TAG_CANDIDATE}") + # PITA... The version of the "file" utility in the filebeat container + # gives "application/octet-stream" instead of "application/x-ms-evtx" + # for Windows .evtx files. + # A similar check exists in filebeat-watch-zeeklogs-uploads-folder.py. + if [[ "$FILEMIME" == "application/octet-stream" ]]; then + if [[ "$(file --brief "${FILENAME}" 2>/dev/null)" == *"Windows"*"Event Log"* ]]; then + # hard-code based on the non-mime file output + FILEMIME="application/x-ms-evtx" + else + # ignore this file, we really do not want it + FILEMIME= + fi + fi + + if [[ -f "$FILENAME" ]] && [[ -n "$FILEMIME" ]]; then + fuser -s "$FILENAME" 2>/dev/null + if [[ $? -ne 0 ]] + then + . $SCRIPT_DIR/filebeat-process-zeek-folder-functions.sh + + PROCESS_TIME=$(date +%s%N) + SOURCEDIR="$(dirname "$FILENAME")" + DESTDIR="./processed/$SOURCEDIR" + DESTNAME="$DESTDIR/$(basename "$FILENAME")" + DESTDIR_EXTRACTED="${DESTNAME}_${PROCESS_TIME}" + LINKDIR="./current" + USERTAG=false + + TAGS=() + IFS=",-/_." read -r -a SOURCESPLIT <<< $(echo "$FILENAME" | sed "s/\.[^.]*$//") + echo "\"$FILENAME\" -> \"${DESTNAME}\"" + for index in "${!SOURCESPLIT[@]}" + do + TAG_CANDIDATE="${SOURCESPLIT[index]}" + if ! in_array TAGS "$TAG_CANDIDATE"; then + if [[ "$TAG_CANDIDATE" = "USERTAG" ]]; then + USERTAG=true + elif [[ -n $TAG_CANDIDATE && ! $TAG_CANDIDATE =~ ^[0-9-]+$ && $TAG_CANDIDATE != "tar" && $TAG_CANDIDATE != "AUTOZEEK" && ! $TAG_CANDIDATE =~ ^AUTOCARVE ]]; then + TAGS+=("${TAG_CANDIDATE}") + fi fi + done + + if [[ "$ZEEK_LOG_AUTO_TAG" != "true" ]] && [[ "$USERTAG" != "true" ]]; then + TAGS=() fi - done - if [[ "$ZEEK_LOG_AUTO_TAG" != "true" ]] && [[ "$USERTAG" != "true" ]]; then - TAGS=() - fi + mkdir -p "$DESTDIR" + mkdir -p "$DESTDIR_EXTRACTED" - mkdir -p "$DESTDIR" - mkdir -p "$DESTDIR_EXTRACTED" + if [[ "$FILEMIME" == "application/x-ms-evtx" ]]; then + # special case for Windows event log files that are uploaded uncompressed + mv -v "$FILENAME" "$DESTDIR_EXTRACTED"/"$(basename "$DESTNAME")" + else + # extract archive to DESTDIR_EXTRACTED + mv -v "$FILENAME" "$DESTNAME" + python3 -m pyunpack.cli "$DESTNAME" "$DESTDIR_EXTRACTED" + fi - if [[ "$FILEMIME" == "application/x-ms-evtx" ]]; then - # special case for Windows event log files that are uploaded uncompressed - # TODO: temporary - rm -vf "$FILENAME" - else - mv -v "$FILENAME" "$DESTNAME" - python3 -m pyunpack.cli "$DESTNAME" "$DESTDIR_EXTRACTED" - fi + ZEEK_LOG_EXT=log + EVTX_LOG_EXT=evtx + pwd + while IFS="" read -r -d "" LOGFILE; do + PROCESS_TIME=$(date +%s%N) + TAGS_JOINED=$(printf "%s," "${TAGS[@]}")${PROCESS_TIME} + LOGFILE_EXT="${LOGFILE##*.}" + if [[ "${LOGFILE_EXT}" == "${EVTX_LOG_EXT}" ]]; then + # convert evtx file to one-event-per-line JSON file + /usr/local/bin/evtx_to_jsonl.sh "$LOGFILE" + LOGFILE+=.json + LINKNAME_BASE="$(basename "$LOGFILE" ."${LOGFILE_EXT}".json)" + LINKNAME="${LINKNAME_BASE}(${TAGS_JOINED}).${LOGFILE_EXT}.json" + else + LINKNAME_BASE="$(basename "$LOGFILE" ."${LOGFILE_EXT}")" + LINKNAME="${LINKNAME_BASE}(${TAGS_JOINED}).${LOGFILE_EXT}" + fi + touch "$LOGFILE" + ln -sfr "$LOGFILE" "$LINKDIR/$LINKNAME" + done < <(find "${DESTDIR_EXTRACTED}" -type f "(" -name "*.${ZEEK_LOG_EXT}" -o -name "*.${EVTX_LOG_EXT}" ")" -printf "%p\0" 2>/dev/null) - find "$DESTDIR_EXTRACTED" -type f -name "*.log" | while read LOGFILE - do - PROCESS_TIME=$(date +%s%N) - TAGS_JOINED=$(printf "%s," "${TAGS[@]}")${PROCESS_TIME} - LINKNAME_BASE="$(basename "$LOGFILE" .log)" - LINKNAME="${LINKNAME_BASE}(${TAGS_JOINED}).log" - touch "$LOGFILE" - ln -sfr "$LOGFILE" "$LINKDIR/$LINKNAME" - done - fi + + fi # fuser says the file is not in use + fi # FILENAME and FILEMIME are good ' fi diff --git a/filebeat/scripts/filebeat-watch-zeeklogs-uploads-folder.py b/filebeat/scripts/filebeat-watch-zeeklogs-uploads-folder.py index 7cfafa519..864ce28d8 100755 --- a/filebeat/scripts/filebeat-watch-zeeklogs-uploads-folder.py +++ b/filebeat/scripts/filebeat-watch-zeeklogs-uploads-folder.py @@ -15,6 +15,7 @@ import magic import os import pathlib +import re import shutil import signal import sys @@ -44,9 +45,16 @@ 'application/zip', # windows event logs (idaholab/Malcolm#465) will be handled here as well, as they # may be uploaded either as-is or compressed - 'application.evtx', + 'application/x-ms-evtx', ] +# PITA... The version of the "file" utility in the filebeat container +# gives "application/octet-stream" instead of "application/x-ms-evtx" +# for Windows .evtx files. +# A similar check exists in filebeat-process-zeek-folder.sh +SUPPORTED_FILE_TYPE_REGEXES = [ + r'Windows.*Event Log', +] ################################################################################################### # handle sigint/sigterm and set a global shutdown variable @@ -58,6 +66,7 @@ def shutdown_handler(signum, frame): ################################################################################################### def file_processor(pathname, **kwargs): mime_types = kwargs["mime_types"] + file_type_regexes = kwargs["file_types"] uid = kwargs["uid"] gid = kwargs["gid"] destination = kwargs["destination"] @@ -72,8 +81,9 @@ def file_processor(pathname, **kwargs): # get the file magic mime type fileMime = magic.from_file(pathname, mime=True) + fileType = magic.from_file(pathname) - if fileMime in mime_types: + if (fileMime in mime_types) or any([re.search(reg, fileType, re.IGNORECASE) for reg in file_type_regexes]): # looks like this is a compressed file (or evtx file), we're assuming it's: # * a zeek log archive to be processed by filebeat # * a windows event log archive to be processed into JSON and then also sent through filebeat @@ -248,6 +258,7 @@ def main(): "uid": args.chownUid, "gid": args.chownGid, "mime_types": SUPPORTED_MIME_TYPES, + "file_types": SUPPORTED_FILE_TYPE_REGEXES, }, args.assumeClosedSec, shuttingDown, diff --git a/hedgehog-iso/config/includes.chroot/opt/zeek/share/zeek/site/extractor_params.zeek b/hedgehog-iso/config/includes.chroot/opt/zeek/share/zeek/site/extractor_params.zeek index 9e1325fa0..4aeca1da6 100644 --- a/hedgehog-iso/config/includes.chroot/opt/zeek/share/zeek/site/extractor_params.zeek +++ b/hedgehog-iso/config/includes.chroot/opt/zeek/share/zeek/site/extractor_params.zeek @@ -623,6 +623,7 @@ export { ["application/x-mmxp"] = "mxp", ["application/x-mobipocket-ebook"] = "mobi", ["application/x-ms-application"] = "application", + ["application/x-ms-evtx"]= "evtx", ["application/x-ms-installer"] = "msi", ["application/x-ms-license"] = "slupkg-ms", ["application/x-ms-manifest"] = "manifest", diff --git a/hedgehog-iso/interface/sensor_ctl/zeek/extractor_override.interesting.zeek b/hedgehog-iso/interface/sensor_ctl/zeek/extractor_override.interesting.zeek index 314d4c5a9..3055ed92f 100644 --- a/hedgehog-iso/interface/sensor_ctl/zeek/extractor_override.interesting.zeek +++ b/hedgehog-iso/interface/sensor_ctl/zeek/extractor_override.interesting.zeek @@ -86,6 +86,7 @@ export { ["application/x-install-instructions"]= "install", ["application/x-lzh-compressed"]= "lzh", ["application/x-ms-application"]= "application", + ["application/x-ms-evtx"]= "evtx", ["application/x-ms-installer"]= "msi", ["application/x-ms-shortcut"]= "lnk", ["application/x-msdos-program"]= "exe", diff --git a/logstash/pipelines/enrichment/23_severity.conf b/logstash/pipelines/enrichment/23_severity.conf index 27d65747a..3c28a2d2a 100644 --- a/logstash/pipelines/enrichment/23_severity.conf +++ b/logstash/pipelines/enrichment/23_severity.conf @@ -164,6 +164,7 @@ filter { ("application/x-gzip" in [file][mime_type]) or ("application/x-install-instructions" in [file][mime_type]) or ("application/x-lzh-compressed" in [file][mime_type]) or + ("application/x-ms-evtx" in [file][mime_type]) or ("application/x-ms-installer" in [file][mime_type]) or ("application/x-ms-shortcut" in [file][mime_type]) or ("application/x-msdownload" in [file][mime_type]) or diff --git a/logstash/pipelines/output/98_finalize.conf b/logstash/pipelines/output/98_finalize.conf index 4a2580c64..3365b269e 100644 --- a/logstash/pipelines/output/98_finalize.conf +++ b/logstash/pipelines/output/98_finalize.conf @@ -5,6 +5,7 @@ filter { mutate { id => "mutate_final_tags_remove" remove_tag => [ "_dateparsefailure", "_dissectfailure", + "_evtx_to_json", "_filebeat_suricata", "_filebeat_suricata_hedgehog_live", "_filebeat_suricata_live", diff --git a/pcap-monitor/scripts/watch-pcap-uploads-folder.py b/pcap-monitor/scripts/watch-pcap-uploads-folder.py index 280735605..e3f21aed9 100755 --- a/pcap-monitor/scripts/watch-pcap-uploads-folder.py +++ b/pcap-monitor/scripts/watch-pcap-uploads-folder.py @@ -82,7 +82,7 @@ def file_processor(pathname, **kwargs): 'application/zip', # windows event logs (idaholab/Malcolm#465) will be handled here as well, as they # may be uploaded either as-is or compressed - 'application.evtx', + 'application/x-ms-evtx', ] ): # looks like this is a compressed file (or evtx file), we're assuming it's: diff --git a/shared/bin/evtx_to_jsonl.sh b/shared/bin/evtx_to_jsonl.sh index fd3221e82..4d4f89538 100755 --- a/shared/bin/evtx_to_jsonl.sh +++ b/shared/bin/evtx_to_jsonl.sh @@ -1,17 +1,35 @@ #!/bin/bash +################################################################################################### + if ! command -v jq >/dev/null 2>&1 || ! command -v evtx >/dev/null 2>&1; then echo "$(basename "${BASH_SOURCE[0]}") requires jq and evtx" >&2 exit 1 fi +################################################################################################### + +# set -x VERBOSE=0 -DELETE_SRC=0 -while getopts vd opts; do +# delete source evtx file only if conversion worked +DELETE_SRC_ON_SUCCESS=0 + +# delete source evtx file regardless of conversion success +DELETE_SRC_FORCE=0 + +while getopts vdf opts; do case ${opts} in - v) VERBOSE=1 ;; - d) DELETE_SRC=1 ;; + v) + VERBOSE=1 + ;; + d) + DELETE_SRC_ON_SUCCESS=1 + ;; + f) + DELETE_SRC_ON_SUCCESS=1 + DELETE_SRC_FORCE=1 + ;; esac done shift "$(($OPTIND -1))" @@ -20,30 +38,55 @@ if [[ "${VERBOSE}" == "1" ]]; then set -x fi +################################################################################################### +# processFile - convert a single evtx file to JSON + function processFile() { local FNAME="$(realpath "$1")" - local FNAME_JSON="${FNAME}.json" if [[ -f "${FNAME}" ]]; then + + # output filespec is input filespec with .evtx.json suffix + local FNAME_JSON="${FNAME}" + [[ "${FNAME_JSON}" == *.evtx ]] || FNAME_JSON+=.evtx + FNAME_JSON+=.json + evtx \ --threads 1 \ --format jsonl \ --no-confirm-overwrite \ --output "${FNAME_JSON}" \ - "${FNAME}" && \ - [[ "${DELETE_SRC}" == "1" ]] && \ - rm -f "${FNAME}" - fi -} + "${FNAME}" + EVTX_EXIT_CODE=$? + + # delete input file if specified + ( ( [[ "${EVTX_EXIT_CODE}" == "0" ]] && [[ "${DELETE_SRC_ON_SUCCESS}" == "1" ]] ) || \ + [[ "${DELETE_SRC_FORCE}" == "1" ]] ) && \ + rm -f "${FNAME}" + + # massage output + if [[ -f "${FNAME_JSON}" ]]; then + true + fi + + fi # [[ -f "${FNAME}" ]] +} # processFile + +################################################################################################### +# process all input arguments (besides getopts) as evtx file to convert to JSON for INPUT in "$@"; do if [[ -d "${INPUT}" ]]; then + # argument represents a directory containing evtx files (nested arbitrarily deep), process each while IFS='' read -r -d '' INPUT_FILE; do processFile "${INPUT_FILE}" done < <(find "${INPUT}" -xdev -ignore_readdir_race -type f -printf '%p\0' 2>/dev/null | sort -z 2>/dev/null) elif [[ -f "${INPUT}" ]]; then + # argument represents a single evtx file, process it processFile "${INPUT}" fi -done +done # for INPUT in "$@"; do + +################################################################################################### if [[ "${VERBOSE}" == "1" ]]; then set +x diff --git a/zeek/config/extractor_override.interesting.zeek b/zeek/config/extractor_override.interesting.zeek index 314d4c5a9..acbdb0943 100644 --- a/zeek/config/extractor_override.interesting.zeek +++ b/zeek/config/extractor_override.interesting.zeek @@ -86,6 +86,7 @@ export { ["application/x-install-instructions"]= "install", ["application/x-lzh-compressed"]= "lzh", ["application/x-ms-application"]= "application", + ["application/x-ms-evtx"] = "evtx", ["application/x-ms-installer"]= "msi", ["application/x-ms-shortcut"]= "lnk", ["application/x-msdos-program"]= "exe", diff --git a/zeek/config/extractor_params.zeek b/zeek/config/extractor_params.zeek index 9e1325fa0..cd9a37513 100644 --- a/zeek/config/extractor_params.zeek +++ b/zeek/config/extractor_params.zeek @@ -623,6 +623,7 @@ export { ["application/x-mmxp"] = "mxp", ["application/x-mobipocket-ebook"] = "mobi", ["application/x-ms-application"] = "application", + ["application/x-ms-evtx"] = "evtx", ["application/x-ms-installer"] = "msi", ["application/x-ms-license"] = "slupkg-ms", ["application/x-ms-manifest"] = "manifest",