From ad19730a4f94855c2ba2972565326909b5f8b8e4 Mon Sep 17 00:00:00 2001 From: Seth Grover Date: Tue, 26 Apr 2022 12:31:07 -0600 Subject: [PATCH] having temporarily backported the patch from arkime/arkime#1877 into my build to specify event.dataset to arkime's capture --- Dockerfiles/arkime.Dockerfile | 3 ++ README.md | 2 +- arkime/etc/config.ini | 2 +- arkime/etc/user_settings.json | 1 + arkime/patch/capture_event_dataset.patch | 34 +++++++++++++++++++ sensor-iso/build.sh | 1 + .../interface/sensor_ctl/control_vars.conf | 1 + .../sensor_ctl/supervisor.d/arkime.conf | 1 + shared/bin/pcap_arkime_and_zeek_processor.py | 4 +++ 9 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 arkime/patch/capture_event_dataset.patch diff --git a/Dockerfiles/arkime.Dockerfile b/Dockerfiles/arkime.Dockerfile index 977f6527b..08cf6591d 100644 --- a/Dockerfiles/arkime.Dockerfile +++ b/Dockerfiles/arkime.Dockerfile @@ -63,6 +63,7 @@ RUN apt-get -q update && \ sed -i '/./,$!d' README.md && \ sed -i "s/.png/.jpg/g" README.md && \ sed -i "s@docs/images@images@g" README.md && \ + sed -i 's/\!\[.*\](.*\/badge.svg)//g' README.md && \ pandoc -s --self-contained --metadata title="Malcolm README" --css $ARKIMEDIR/doc/doc.css -o $ARKIMEDIR/doc/README.html $ARKIMEDIR/doc/README.md && \ cd /opt && \ git clone --depth=1 --single-branch --recurse-submodules --shallow-submodules --no-tags --branch="v$ARKIME_VERSION" "$ARKIME_URL" "./arkime-"$ARKIME_VERSION && \ @@ -112,6 +113,7 @@ ARG OS_HOST=opensearch ARG OS_PORT=9200 ARG MALCOLM_USERNAME=admin ARG ARKIME_ECS_PROVIDER=arkime +ARG ARKIME_ECS_DATASET=session ARG ARKIME_INTERFACE=eth0 ARG ARKIME_ANALYZE_PCAP_THREADS=1 ARG WISE=off @@ -134,6 +136,7 @@ ENV MALCOLM_USERNAME $MALCOLM_USERNAME # this needs to be present, but is unused as nginx is going to handle auth for us ENV ARKIME_PASSWORD "ignored" ENV ARKIME_ECS_PROVIDER $ARKIME_ECS_PROVIDER +ENV ARKIME_ECS_DATASET $ARKIME_ECS_DATASET ENV ARKIMEDIR "/opt/arkime" ENV ARKIME_ANALYZE_PCAP_THREADS $ARKIME_ANALYZE_PCAP_THREADS ENV WISE $WISE diff --git a/README.md b/README.md index 8fb1f3246..dcff1b2f4 100644 --- a/README.md +++ b/README.md @@ -998,7 +998,7 @@ A stock installation of Arkime extracts all of its network connection ("session" In this way, when full packet capture is an option, analysis of PCAP files can be enhanced by the additional information Zeek provides. When full packet capture is not an option, similar analysis can still be performed using the same interfaces and processes using the Zeek logs alone. -One value of particular mention is **Zeek Log Type** (`event.dataset` in OpenSearch). This value corresponds to the kind of Zeek `.log` file from which the record was created. In other words, a search could be restricted to records from `conn.log` by searching `event.dataset == conn`, or restricted to records from `weird.log` by searching `event.dataset == weird`. In this same way, to view *only* records from Zeek logs (excluding any from PCAP files), use the special Arkime `EXISTS` filter, as in `event.dataset == EXISTS!`. On the other hand, to exclude Zeek logs and only view Arkime Sessions, use `fileId != EXISTS!`. +A few values of particular mention include **Data Source** (`event.provider` in OpenSearch), which can be used to distinguish from among the sources of the network traffic metadata record (e.g., `zeek` for Zeek logs and `arkime` for Arkime sessions); and, **Log Type** (`event.dataset` in OpenSearch), which corresponds to the kind of Zeek `.log` file from which the record was created. In other words, a search could be restricted to records from `conn.log` by searching `event.provider == zeek && event.dataset == conn`, or restricted to records from `weird.log` by searching `event.provider == zeek && event.dataset == weird`. Click the icon of the owl **🦉** in the upper-left hand corner of to access the Arkime usage documentation (accessible at [https://localhost/help](https://localhost/help) if you are connecting locally), click the **Fields** label in the navigation pane, then search for `zeek` to see a list of the other Zeek log types and fields available to Malcolm. diff --git a/arkime/etc/config.ini b/arkime/etc/config.ini index 38dfa7ed8..8bb3c4bf9 100644 --- a/arkime/etc/config.ini +++ b/arkime/etc/config.ini @@ -33,7 +33,7 @@ dropGroup=arkime # The userAutoCreateTmpl should more or less match what's in /etc/user_settings.json # which is what's used when creating the default admin user. userNameHeader=http_auth_http_user -userAutoCreateTmpl={"userId": "${this.http_auth_http_user}", "userName": "${this.http_auth_http_user}", "enabled": true, "createEnabled": false, "webEnabled": true, "headerAuthEnabled": true, "emailSearch": true, "removeEnabled": false, "packetSearch": true, "hideStats": false, "hideFiles": false, "hidePcap": false, "disablePcapDownload": false, "settings": { "timezone": "local", "detailFormat": "last", "showTimestamps": "last", "sortColumn": "start", "sortDirection": "desc", "spiGraph": "protocol", "connSrcField": "source.ip", "connDstField": "destination.ip", "numPackets": "last", "theme" : "custom1: #222222,#E2E2E2,#FFFFFF,#00789E,#004A79,#017D73,#092B40,#42b7c5,#2A7580,#ecb30a,#333333,#89ADCC,#6D6D6D,#FFE7E7,#ECFEFF", "manualQuery": false }, "views": { "Public IP Addresses": { "expression": "(country.dst == EXISTS!) || (country.src == EXISTS!) || (ip.dst == EXISTS! && ip.dst != 0.0.0.0/8 && ip.dst != 10.0.0.0/8 && ip.dst != 100.64.0.0/10 && ip.dst != 127.0.0.0/8 && ip.dst != 169.254.0.0/16 && ip.dst != 172.16.0.0/12 && ip.dst != 192.0.0.0/24 && ip.dst != 192.0.2.0/24 && ip.dst != 192.88.99.0/24 && ip.dst != 192.168.0.0/16 && ip.dst != 198.18.0.0/15 && ip.dst != 198.51.100.0/24 && ip.dst != 203.0.113.0/24 && ip.dst != 224.0.0.0/4 && ip.dst != 232.0.0.0/8 && ip.dst != 233.0.0.0/8 && ip.dst != 234.0.0.0/8 && ip.dst != 239.0.0.0/8 && ip.dst != 240.0.0.0/4 && ip.dst != 255.255.255.255 && ip.dst != :: && ip.dst != ::1 && ip.dst != ff00::/8 && ip.dst != fe80::/10 && ip.dst != fc00::/7 && ip.dst != fd00::/8) || (ip.src == EXISTS! && ip.src != 0.0.0.0/8 && ip.src != 10.0.0.0/8 && ip.src != 100.64.0.0/10 && ip.src != 127.0.0.0/8 && ip.src != 169.254.0.0/16 && ip.src != 172.16.0.0/12 && ip.src != 192.0.0.0/24 && ip.src != 192.0.2.0/24 && ip.src != 192.88.99.0/24 && ip.src != 192.168.0.0/16 && ip.src != 198.18.0.0/15 && ip.src != 198.51.100.0/24 && ip.src != 203.0.113.0/24 && ip.src != 224.0.0.0/4 && ip.src != 232.0.0.0/8 && ip.src != 233.0.0.0/8 && ip.src != 234.0.0.0/8 && ip.src != 239.0.0.0/8 && ip.src != 240.0.0.0/4 && ip.src != 255.255.255.255 && ip.src != :: && ip.src != ::1 && ip.src != ff00::/8 && ip.src != fe80::/10 && ip.src != fc00::/7 && ip.src != fd00::/8)" }, "Arkime Sessions": { "expression": "event.provider == arkime" }, "Zeek Logs": { "expression": "event.provider == zeek" }, "Zeek conn.log": { "expression": "event.provider == zeek && event.dataset == conn" }, "Zeek Exclude conn.log": { "expression": "event.provider == zeek && event.dataset != conn" } }, "tableStates": { "sessionsNew": { "order": [ [ "firstPacket", "desc" ] ], "visibleHeaders": [ "protocol", "event.dataset", "firstPacket", "lastPacket", "src", "source.port", "dst", "destination.port", "network.packets", "dbby", "tags", "info" ] } } } +userAutoCreateTmpl={"userId": "${this.http_auth_http_user}", "userName": "${this.http_auth_http_user}", "enabled": true, "createEnabled": false, "webEnabled": true, "headerAuthEnabled": true, "emailSearch": true, "removeEnabled": false, "packetSearch": true, "hideStats": false, "hideFiles": false, "hidePcap": false, "disablePcapDownload": false, "settings": { "timezone": "local", "detailFormat": "last", "showTimestamps": "last", "sortColumn": "start", "sortDirection": "desc", "spiGraph": "protocol", "connSrcField": "source.ip", "connDstField": "destination.ip", "numPackets": "last", "theme" : "custom1: #222222,#E2E2E2,#FFFFFF,#00789E,#004A79,#017D73,#092B40,#42b7c5,#2A7580,#ecb30a,#333333,#89ADCC,#6D6D6D,#FFE7E7,#ECFEFF", "manualQuery": false }, "views": { "Public IP Addresses": { "expression": "(country.dst == EXISTS!) || (country.src == EXISTS!) || (ip.dst == EXISTS! && ip.dst != 0.0.0.0/8 && ip.dst != 10.0.0.0/8 && ip.dst != 100.64.0.0/10 && ip.dst != 127.0.0.0/8 && ip.dst != 169.254.0.0/16 && ip.dst != 172.16.0.0/12 && ip.dst != 192.0.0.0/24 && ip.dst != 192.0.2.0/24 && ip.dst != 192.88.99.0/24 && ip.dst != 192.168.0.0/16 && ip.dst != 198.18.0.0/15 && ip.dst != 198.51.100.0/24 && ip.dst != 203.0.113.0/24 && ip.dst != 224.0.0.0/4 && ip.dst != 232.0.0.0/8 && ip.dst != 233.0.0.0/8 && ip.dst != 234.0.0.0/8 && ip.dst != 239.0.0.0/8 && ip.dst != 240.0.0.0/4 && ip.dst != 255.255.255.255 && ip.dst != :: && ip.dst != ::1 && ip.dst != ff00::/8 && ip.dst != fe80::/10 && ip.dst != fc00::/7 && ip.dst != fd00::/8) || (ip.src == EXISTS! && ip.src != 0.0.0.0/8 && ip.src != 10.0.0.0/8 && ip.src != 100.64.0.0/10 && ip.src != 127.0.0.0/8 && ip.src != 169.254.0.0/16 && ip.src != 172.16.0.0/12 && ip.src != 192.0.0.0/24 && ip.src != 192.0.2.0/24 && ip.src != 192.88.99.0/24 && ip.src != 192.168.0.0/16 && ip.src != 198.18.0.0/15 && ip.src != 198.51.100.0/24 && ip.src != 203.0.113.0/24 && ip.src != 224.0.0.0/4 && ip.src != 232.0.0.0/8 && ip.src != 233.0.0.0/8 && ip.src != 234.0.0.0/8 && ip.src != 239.0.0.0/8 && ip.src != 240.0.0.0/4 && ip.src != 255.255.255.255 && ip.src != :: && ip.src != ::1 && ip.src != ff00::/8 && ip.src != fe80::/10 && ip.src != fc00::/7 && ip.src != fd00::/8)" }, "Arkime Sessions": { "expression": "event.provider == arkime" }, "Zeek Logs": { "expression": "event.provider == zeek" }, "Zeek conn.log": { "expression": "event.provider == zeek && event.dataset == conn" }, "Zeek Exclude conn.log": { "expression": "event.provider == zeek && event.dataset != conn" } }, "tableStates": { "sessionsNew": { "order": [ [ "firstPacket", "desc" ] ], "visibleHeaders": [ "protocol", "event.provider", "event.dataset", "firstPacket", "lastPacket", "src", "source.port", "dst", "destination.port", "network.packets", "dbby", "tags", "info" ] } } } parseSMTP=true parseSMB=true parseQSValue=false diff --git a/arkime/etc/user_settings.json b/arkime/etc/user_settings.json index 3bc070ffc..5912073c1 100644 --- a/arkime/etc/user_settings.json +++ b/arkime/etc/user_settings.json @@ -51,6 +51,7 @@ ], "visibleHeaders": [ "protocol", + "event.provider", "event.dataset", "firstPacket", "lastPacket", diff --git a/arkime/patch/capture_event_dataset.patch b/arkime/patch/capture_event_dataset.patch new file mode 100644 index 000000000..a8862c7d6 --- /dev/null +++ b/arkime/patch/capture_event_dataset.patch @@ -0,0 +1,34 @@ +diff --git a/capture/db.c b/capture/db.c +index 2515b177..0a6d06bd 100644 +--- a/capture/db.c ++++ b/capture/db.c +@@ -71,6 +71,7 @@ LOCAL int dbExit; + LOCAL char *esBulkQuery; + LOCAL int esBulkQueryLen; + LOCAL char *ecsEventProvider; ++LOCAL char *ecsEventDataset; + + extern uint64_t packetStats[MOLOCH_PACKET_MAX]; + +@@ -906,8 +907,12 @@ void moloch_db_save_session(MolochSession_t *session, int final) + } + BSB_EXPORT_cstr(jbsb, "],"); + +- if (ecsEventProvider) { ++ if (ecsEventProvider && ecsEventDataset) { ++ BSB_EXPORT_sprintf(jbsb, "\"event\":{\"provider\":\"%s\", \"dataset\":\"%s\"},", ecsEventProvider, ecsEventDataset); ++ } else if (ecsEventProvider) { + BSB_EXPORT_sprintf(jbsb, "\"event\":{\"provider\":\"%s\"},", ecsEventProvider); ++ } else if (ecsEventDataset) { ++ BSB_EXPORT_sprintf(jbsb, "\"event\":{\"dataset\":\"%s\"},", ecsEventDataset); + } + + int inGroupNum = 0; +@@ -2661,6 +2666,7 @@ void moloch_db_init() + } + + ecsEventProvider = moloch_config_str(NULL, "ecsEventProvider", NULL); ++ ecsEventDataset = moloch_config_str(NULL, "ecsEventDataset", NULL); + + int thread; + for (thread = 0; thread < config.packetThreads; thread++) { diff --git a/sensor-iso/build.sh b/sensor-iso/build.sh index 771ebd8e9..46319338d 100755 --- a/sensor-iso/build.sh +++ b/sensor-iso/build.sh @@ -148,6 +148,7 @@ if [ -d "$WORKDIR" ]; then sed -i "s/.png/.jpg/g" HedgehogLinux.jpg.md sed -i "s@/docs/logo/@/docs/images/@g" HedgehogLinux.jpg.md sed -i "s/^# Hedgehog Linux$//" HedgehogLinux.jpg.md + sed -i 's/\!\[.*\](.*\/badge.svg)//g' HedgehogLinux.jpg.md pandoc -s --self-contained --metadata title="Hedgehog Linux" --css doc.css -o HedgehogLinux.html HedgehogLinux.jpg.md rm -f HedgehogLinux.jpg.md popd >/dev/null 2>&1 diff --git a/sensor-iso/interface/sensor_ctl/control_vars.conf b/sensor-iso/interface/sensor_ctl/control_vars.conf index bf84187fc..2a64a4091 100644 --- a/sensor-iso/interface/sensor_ctl/control_vars.conf +++ b/sensor-iso/interface/sensor_ctl/control_vars.conf @@ -14,6 +14,7 @@ export ARKIME_VIEWER_PORT=8005 export ARKIME_PACKET_THREADS=5 export ARKIME_PACKET_ACL= export ARKIME_ECS_PROVIDER=arkime +export ARKIME_ECS_DATASET=session export PROTOLOGBEAT_PORT=9515 export PROTOLOGBEAT_INTERVAL=10 diff --git a/sensor-iso/interface/sensor_ctl/supervisor.d/arkime.conf b/sensor-iso/interface/sensor_ctl/supervisor.d/arkime.conf index 3096a4478..0b919080c 100644 --- a/sensor-iso/interface/sensor_ctl/supervisor.d/arkime.conf +++ b/sensor-iso/interface/sensor_ctl/supervisor.d/arkime.conf @@ -29,6 +29,7 @@ command=/opt/arkime/bin/capture %(ENV_ARKIME_HTTPS_FLAG)s -o parsersDir=/opt/arkime/parsers -o pluginsDir=/opt/arkime/plugins -o ecsEventProvider="%(ENV_ARKIME_ECS_PROVIDER)s" + -o ecsEventDataset="%(ENV_ARKIME_ECS_DATASET)s" --node "%(ENV_ARKIME_NODE_NAME)s" --host "%(ENV_ARKIME_NODE_HOST)s" startsecs=30 diff --git a/shared/bin/pcap_arkime_and_zeek_processor.py b/shared/bin/pcap_arkime_and_zeek_processor.py index 61e03d7ba..a1e02b89f 100755 --- a/shared/bin/pcap_arkime_and_zeek_processor.py +++ b/shared/bin/pcap_arkime_and_zeek_processor.py @@ -60,6 +60,7 @@ shuttingDown = False scanWorkersCount = AtomicInt(value=0) arkimeProvider = os.getenv('ARKIME_ECS_PROVIDER', 'arkime') +arkimeDataset = os.getenv('ARKIME_ECS_DATASET', 'session') ################################################################################################### # handle sigint/sigterm and set a global shutdown variable @@ -91,6 +92,7 @@ def arkimeCaptureFileWorker(arkimeWorkerArgs): global shuttingDown global scanWorkersCount global arkimeProvider + global arkimeDataset scanWorkerId = scanWorkersCount.increment() # unique ID for this thread @@ -140,6 +142,8 @@ def arkimeCaptureFileWorker(arkimeWorkerArgs): '--quiet', '-o', f'ecsEventProvider={arkimeProvider}', + '-o', + f'ecsEventDataset={arkimeDataset}', '-r', fileInfo[FILE_INFO_DICT_NAME], ]