From b0743c6c350d99c98e366e24bc672dd9b841420c Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Mon, 18 Jul 2022 14:42:30 +0000
Subject: [PATCH 01/23] init revamp

---
 Dockerfile                           |  61 ++----
 configextractor_.py                  | 276 ---------------------------
 configextractor_/configextractor_.py |  81 ++++++++
 configextractor_/update_server.py    |  87 +++++++++
 service_manifest.yml                 |  42 +++-
 5 files changed, 214 insertions(+), 333 deletions(-)
 delete mode 100755 configextractor_.py
 create mode 100755 configextractor_/configextractor_.py
 create mode 100644 configextractor_/update_server.py

diff --git a/Dockerfile b/Dockerfile
index 12e4bf6..2bf8b55 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,11 +1,11 @@
 ARG branch=latest
 FROM cccs/assemblyline-v4-service-base:$branch AS base
 
-ENV SERVICE_PATH configextractor_.ConfigExtractor
+ENV SERVICE_PATH configextractor_.configextractor_.ConfigExtractor
 ENV YARA_VERSION=4.2.0
 
 USER assemblyline
-RUN pip uninstall -y yara-python
+#RUN pip uninstall -y yara-python
 
 USER root
 RUN apt-get update && apt-get install -y git libssl1.1 libmagic1 upx-ucl && rm -rf /var/lib/apt/lists/*
@@ -15,60 +15,33 @@ FROM base AS build
 RUN apt-get update && apt-get install -y git libssl-dev libmagic-dev automake libtool make gcc wget libjansson-dev pkg-config && rm -rf /var/lib/apt/lists/*
 
 # Compile and install YARA
-RUN wget -O /tmp/yara.tar.gz https://github.com/VirusTotal/yara/archive/v$YARA_VERSION.tar.gz
+RUN wget -O /tmp/yara.tar.gz https://github.com/VirusTotal/yara/archive/v${YARA_VERSION}.tar.gz
 RUN tar -zxf /tmp/yara.tar.gz -C /tmp
-WORKDIR /tmp/yara-$YARA_VERSION
+WORKDIR /tmp/yara-${YARA_VERSION}
 RUN ./bootstrap.sh
-RUN ./configure --enable-cuckoo --enable-magic --enable-dotnet --with-crypto --prefix /tmp/yara_install
+RUN ./configure --enable-magic --enable-dotnet --with-crypto --prefix /tmp/yara_install
 RUN make
 RUN make install
 
+
 # Build the yara python plugins, install other dependencies
 USER assemblyline
 RUN touch /tmp/before-pip
-# Get ConfigExtractor library
-RUN git clone --recurse-submodules https://github.com/CybercentreCanada/configextractor-py.git /tmp/configextractor-py
-RUN pip install --global-option="build" --global-option="--enable-dotnet" --global-option="--enable-magic" yara-python==$YARA_VERSION
-RUN pip install --no-cache-dir --user --use-deprecated=legacy-resolver \
- gitpython plyara /tmp/configextractor-py/RATDecoders/ /tmp/configextractor-py/
-
-RUN git clone https://github.com/kevoreilly/CAPEv2.git /tmp/CAPEv2
-
-# Install packages required for CAPE's cuckoo library
-RUN pip install chardet pyzipper \
- && rm -rf ~/.cache/pip
-
-# Remove disabled/test parsers
-RUN rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/*.py_disabled
-RUN rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/test_cape.py
 
-# Remove 'bad' parsers
-RUN rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/LokiBot.py
-RUN rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/GuLoader.py
-RUN rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/SquirrelWaffle.py
-RUN rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/BuerLoader.py
-
-RUN mkdir -p /tmp/al_service/CAPEv2/modules/processing/parsers/CAPE/
-RUN cp -r /tmp/CAPEv2/modules/processing/parsers/CAPE/* /tmp/al_service/CAPEv2/modules/processing/parsers/CAPE/
-RUN mkdir -p /tmp/al_service/CAPEv2/lib
-RUN cp -r /tmp/CAPEv2/lib/* /tmp/al_service/CAPEv2/lib/
-
-RUN rm -rf /tmp/CAPEv2
+# Get ConfigExtractor library
+RUN pip install -U git+https://github.com/CybercentreCanada/configextractor-py@revamp
+RUN pip install -U git+https://github.com/CybercentreCanada/maco
 
-# # Remove files that existed before the pip install so that our copy command below doesn't take a snapshot of
-# # files that already exist in the base image
-# RUN find /var/lib/assemblyline/.local -type f ! -newer /tmp/before-pip -delete
+RUN pip install --no-cache-dir --user --global-option="build" --global-option="--enable-dotnet" --global-option="--enable-magic" git+https://github.com/VirusTotal/yara-python.git
+RUN pip install --no-cache-dir --user gitpython plyara markupsafe==2.0.1
 
-# # Switch back to root and change the ownership of the files to be copied due to bitbucket pipeline uid nonsense
-# USER root
-# RUN chown root:root -R /var/lib/assemblyline/.local
+# Public libraries that can be used by parsers
+RUN pip install --no-cache-dir --user netstruct beautifulsoup4 pyOpenSSL
 
 # Revert back to before the compile
 FROM base
 
 COPY --from=build /tmp/yara_install /usr/local
-COPY --from=build /tmp/configextractor-py/dependencies /opt/al_service/dependencies
-COPY --from=build /tmp/al_service/CAPEv2/ /opt/al_service/CAPEv2
 COPY --chown=assemblyline:assemblyline --from=build /var/lib/assemblyline/.local /var/lib/assemblyline/.local
 
 # Create directories
@@ -79,10 +52,6 @@ RUN mkdir -p /opt/al_service
 WORKDIR /opt/al_service
 COPY . .
 
-# Make sure we actually have the right version of pyparsing by uninstalling it as root
-# then later reinstalling an exact version as the user account
-RUN pip uninstall --yes pyparsing flask
-
 # Cleanup
 RUN rm ./Dockerfile
 
@@ -91,11 +60,7 @@ RUN chown -R assemblyline /opt/al_service
 
 # Patch version in manifest
 ARG version=4.0.0.dev1
-ENV PUBLIC_SERVICE_VERSION=$version
-ENV CAPE_PARSERS_DIR=/opt/al_service/CAPEv2/modules/processing/parsers/CAPE/
-ENV PYTHONPATH=$PYTHONPATH:/opt/al_service/CAPEv2/
 RUN sed -i -e "s/\$SERVICE_TAG/$version/g" service_manifest.yml
 
 # Switch to assemblyline user
 USER assemblyline
-RUN pip install --user pyparsing==2.3.0 flask~=1.1.0
diff --git a/configextractor_.py b/configextractor_.py
deleted file mode 100755
index 7af5317..0000000
--- a/configextractor_.py
+++ /dev/null
@@ -1,276 +0,0 @@
-import ast
-from configextractor import cli
-import json
-import tempfile
-import os
-import re
-from mwcp.metadata import File as file_meta_obj
-
-from assemblyline.common import forge
-from assemblyline.odm.base import IP_ONLY_REGEX, FULL_URI
-from assemblyline.odm.models.tagging import Tagging
-from assemblyline_v4_service.common.base import ServiceBase
-from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT
-from typing import List
-from urllib3.util import parse_url
-
-cl_engine = forge.get_classification()
-
-HEURISTICS_MAP = {"malware": 1, "safe": 2}
-# This dict contains fields that we care about, and the corresponding tag if they exist
-FIELD_TAG_MAP = {
-    'address': 'network.dynamic.uri',
-    'c2_address': 'network.dynamic.uri',
-    'c2_url': 'network.dynamic.uri',
-    'credential': None,
-    'directory': 'file.path',
-    'email_address': None,
-    'event': None,
-    'filename': 'file.path',
-    'filepath': 'file.path',
-    'ftp': None,
-    'guid': None,
-    'injectionprocess': None,
-    'interval': None,
-    'key': None,
-    'listenport': None,
-    'missionid': None,
-    'mutex': 'dynamic.mutex',
-    'outputfile': None,
-    'password': 'file.string.extracted',
-    'pipe': None,
-    'proxy': None,
-    'proxy_address': None,
-    'registrydata': None,
-    'registrypath': 'dynamic.registry_key',
-    'registrypathdata': None,
-    'rsa_private_key': None,
-    'rsa_public_key': None,
-    'service': None,
-    'servicedescription': None,
-    'servicedisplayname': None,
-    'servicedll': None,
-    'serviceimage': None,
-    'servicename': None,
-    'ssl_cert_sha1': None,
-    'url': 'network.dynamic.uri',
-    'urlpath': None,
-    'useragent': None,
-    'username': 'file.string.extracted',
-    'version': 'file.pe.versions.description'
-}
-
-
-class ConfigExtractor(ServiceBase):
-    def __init__(self, config=None):
-        super(ConfigExtractor, self).__init__(config)
-        self.file_parsers = {}
-        self.tag_parsers = None
-        self.parser_classification = []  # default should be the classification set for the service.
-        cli.ROOT_DIR = '/opt/al_service/dependencies/'
-        cli.init_root_dependencies()
-        cli.load_parsers()
-
-    def start(self):
-        yara_externals = {f'al_{x.replace(".", "_")}': "" for x in Tagging.flat_fields().keys()}
-        yara_externals.update(
-            {
-                "al_file_rule_yara": ""
-            }
-        )
-        file_parsers, tag_parsers = cli.compile(yara_externals)
-        self.log.info(f"loaded {list(file_parsers.keys())}")
-        cli.validate_parser_config()
-        self.file_parsers = file_parsers
-        self.tag_parsers = tag_parsers
-
-    def execute(self, request):
-        mwcp_report = cli.register()
-        result = Result()
-        # Run Ratdecoders
-        output = cli.run_ratdecoders(request.file_path, mwcp_report)
-        if type(output) is str:
-            if "error" in output:
-                self.log.warning(output)
-            else:
-                self.log.debug(output)
-            output = ""
-        if type(output) is dict:
-            self.log.debug(output)
-            for parser, fields in output.items():
-                self.section_builder(parser, fields, result, "RATDecoder")
-
-        tags = {f"al_{k.replace('.', '_')}": i for k, i in request.task.tags.items()}
-        newtags = {}
-        # yara externals must be dicts w key value pairs being strings
-        for k, v in tags.items():
-            key = f"al_{k.replace('.', '_')}"
-            for i in range(len(v)):
-                if not isinstance(v[i], str):
-                    v[i] = str(v[i])
-            value = " | ".join(v)
-            newtags[key] = value
-        # get matches for both, dedup then run
-        cli.run_mwcfg(request.file_path, mwcp_report)
-        if request.get_param('use_cape'):
-            cli.run_cape(request.file_path, mwcp_report)
-
-        # Handle metadata from mwcp_report, generate section
-        metadata = mwcp_report.metadata
-        metadata.pop('debug', [])  # Dumped as separate file
-        metadata.pop('other', [])  # Another section displays 'other' data
-        if metadata:
-            meta_section = ResultSection('MWCP Metadata', body=json.dumps(
-                metadata), body_format=BODY_FORMAT.JSON)
-            tags = dict()
-            for field, data in metadata.items():
-                if FIELD_TAG_MAP.get(field):
-                    if FIELD_TAG_MAP[field].startswith('network'):
-                        tag_network_ioc(meta_section, data)
-                    else:
-                        [meta_section.add_tag(FIELD_TAG_MAP[field], d) for d in data]
-
-            if meta_section.tags:
-                result.add_section(meta_section)
-
-        parsers = cli.deduplicate(self.file_parsers, self.tag_parsers, request.file_path, newtags)
-        output_fields, reports = cli.run(parsers, request.file_path)
-        for parser, field_dict in output_fields.items():
-            self.section_builder(parser, field_dict, result)
-        for report in reports:
-            for metadata_list in report._metadata.values():
-                for meta in metadata_list:
-                    if isinstance(meta, file_meta_obj):
-                        with tempfile.NamedTemporaryFile(dir=self.working_directory, delete=False) as tmp_file:
-                            tmp_file.write(meta.data)
-                            tmp_file.seek(0)
-                            request.add_supplementary(tmp_file.name, f"{meta.md5[:5]}_{meta.name}", meta.description)
-        fd, temp_path = tempfile.mkstemp(dir=self.working_directory)
-        if output or output_fields:
-            with os.fdopen(fd, "w") as myfile:
-                myfile.write(json.dumps(output))
-                myfile.write(json.dumps(output_fields))
-            request.add_supplementary(temp_path, "output.json", "This is MWCP output as a JSON file")
-        request.result = result
-
-    def section_builder(self, parser, field_dict, result, parsertype="MWCP"):
-        json_body = {}
-        malware_name = ''
-        malware_types = []
-        mitre_group = ''
-        mitre_att = ''
-        category = 'malware'
-        # get malware names from parser objects
-        if parsertype == "RATDecoder":
-            malware_name = parser
-        if parsertype == "MWCP":
-            for name, obj in self.file_parsers.items():
-                if parser in obj.parser_list:
-                    malware_name = obj.malware
-                    malware_types = obj.malware_types if isinstance(obj.malware_types, list) else [obj.malware_types]
-                    mitre_att = obj.mitre_att
-                    mitre_group = obj.mitre_group
-                    category = obj.category
-                    for item in ['classification', 'mitre_group', 'mitre_att',
-                                 'malware', 'malware_types', 'category']:
-                        val = getattr(obj, item, None)
-                        if val:
-                            json_body[item] = val
-                    break
-        parser_section = ResultSection(f"{parsertype} : {parser}")
-
-        parser_section = classification_checker(parser_section, parser, self.file_parsers)
-        if len(field_dict) > 0:  # if any decoder output exists raise heuristic
-            parser_section.set_body(json.dumps(json_body), body_format=BODY_FORMAT.KEY_VALUE)
-            parser_section.set_heuristic(HEURISTICS_MAP.get(category, 1), attack_id=mitre_att)
-            parser_section.add_tag("source", f"{parsertype}.{parser}")
-
-            if malware_name:
-                parser_section.add_tag('attribution.implant', malware_name.upper())
-            if mitre_group:
-                parser_section.add_tag('attribution.actor', mitre_group.upper())
-            for malware_type in malware_types:
-                parser_section.add_tag('attribution.family', malware_type.upper())
-        # Create subsections and attach them to the main parser_section
-        subsection_builder(parser_section, field_dict)
-
-        other_key = "other"
-        if other_key in field_dict:
-            other_content = field_dict[other_key]
-            other_section = ResultSection("Other metadata found", body_format=BODY_FORMAT.KEY_VALUE,
-                                          body=json.dumps(other_content))
-            parser_section.add_subsection(other_section)
-
-        for field in field_dict:
-            if field != other_key and field not in FIELD_TAG_MAP:
-                self.log.debug(f"{field} does not exist in FIELD_TAG_MAP")
-        result.add_section(parser_section)
-
-
-def classification_checker(res_section, parser_name, file_parsers):
-    for name, parser_obj in file_parsers.items():
-        if name == parser_name:
-            res_section.classification = cl_engine.normalize_classification(parser_obj.classification)
-    return res_section
-
-
-def subsection_builder(parent_section: ResultSection = None, fields: dict = {}):
-    for mwcp_field, mwcp_field_data in fields.items():
-        if mwcp_field in FIELD_TAG_MAP and mwcp_field_data != ['-']:
-            tag = FIELD_TAG_MAP[mwcp_field]
-            table_body = []
-            table_section = ResultSection(f"Extracted {mwcp_field.capitalize()}")
-
-            # Make sure data isn't a string representation of a list
-            for index, data in enumerate(mwcp_field_data):
-                if isinstance(data, str) and all(symbol in data for symbol in ['[', ']']):
-                    mwcp_field_data.remove(data)
-                    for x in ast.literal_eval(data):
-                        mwcp_field_data.append(x)
-
-            if tag:
-                # Was a URL/URI tagged?
-                if 'uri' in tag:
-                    tag_network_ioc(table_section, mwcp_field_data)
-                else:
-                    for x in mwcp_field_data:
-                        table_section.add_tag(tag, x)
-                # Tag everything that we can
-            # Add data to section body
-            for line in mwcp_field_data:
-                if type(line) is str:
-                    table_body.append({mwcp_field: line})
-                elif type(line) is list:
-                    for item in line:
-                        table_body.append({mwcp_field: item})
-            table_section.set_body(body_format=BODY_FORMAT.TABLE, body=json.dumps(table_body))
-
-            parent_section.add_subsection(table_section)
-
-
-def tag_network_ioc(section: ResultSection, dataset: List[str]) -> None:
-    if not section.heuristic:
-        # Heuristic should only be applied once
-        section.set_heuristic(3)
-    for data in dataset:
-        # Tests indicated the possibilty of nested lists
-        main_tag = None
-        if isinstance(data, list):
-            tag_network_ioc(section, data)
-        elif re.match(IP_ONLY_REGEX, data):
-            main_tag = 'network.dynamic.ip'
-        elif re.match(FULL_URI, data):
-            main_tag = 'network.dynamic.uri'
-            # Deconstruct the raw data to additional tagging
-            parsed_uri = parse_url(data)
-            if parsed_uri.host:
-                # tag_reducer will de-dup IP being tagged twice
-                host_tag = 'network.dynamic.ip' if re.match(
-                    IP_ONLY_REGEX, parsed_uri.host) else 'network.dynamic.domain'
-                section.add_tag(host_tag, parsed_uri.host)
-            if parsed_uri.port:
-                section.add_tag('network.port', parsed_uri.port)
-            if parsed_uri.path:
-                section.add_tag('network.dynamic.uri_path', parsed_uri.path)
-        if main_tag:
-            section.add_tag(main_tag, data)
diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
new file mode 100755
index 0000000..bebe6ed
--- /dev/null
+++ b/configextractor_/configextractor_.py
@@ -0,0 +1,81 @@
+from collections import defaultdict
+
+from assemblyline.common import forge
+from assemblyline.odm.base import IP_ONLY_REGEX, FULL_URI, DOMAIN_ONLY_REGEX
+from assemblyline.odm.models.tagging import Tagging
+from assemblyline_v4_service.common.base import ServiceBase
+from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT
+
+import json
+import hashlib
+import os
+import regex
+
+from configextractor.main import ConfigExtractor as CX
+
+
+cl_engine = forge.get_classification()
+
+
+class ConfigExtractor(ServiceBase):
+    def __init__(self, config=None):
+        super(ConfigExtractor, self).__init__(config)
+        self.cx = None
+
+    # Generate the rules_hash and init rules_list based on the raw files in the rules_directory from updater
+    def _gen_rules_hash(self) -> str:
+        self.rules_list = []
+        for obj in os.listdir(self.rules_directory):
+            obj_path = os.path.join(self.rules_directory, obj)
+            if os.path.isdir(obj_path):
+                self.rules_list.append(obj_path)
+        all_sha256s = [f for f in self.rules_list]
+
+        if len(all_sha256s) == 1:
+            return all_sha256s[0][:7]
+
+        return hashlib.sha256(' '.join(sorted(all_sha256s)).encode('utf-8')).hexdigest()[:7]
+
+    def _load_rules(self) -> None:
+        if self.rules_list:
+            self.log.debug(self.rules_list)
+            blocklist = []
+            blocklist_location = os.path.join(self.rules_directory, 'blocked_parsers')
+            if os.path.exists(blocklist_location):
+                for line in open(blocklist_location, 'r').readlines():
+                    _, source, _, parser_name = line.split('_')
+                    blocklist.append(rf"*{parser_name}$")
+            self.log.info(f'Blocking the following parsers matching these patterns: {blocklist}')
+            self.cx = CX(parsers_dirs=self.rules_list, logger=self.log, parser_blocklist=blocklist)
+
+        if not self.cx:
+            raise Exception("Unable to start ConfigExtractor because can't find directory containing parsers")
+
+        if not self.cx.parsers:
+            raise Exception(
+                f"Unable to start ConfigExtractor because can't find parsers in given directory: {self.rules_directory}")
+
+    def tag_output(self, output: dict, tags: dict = {}):
+        for value in output.values():
+            if isinstance(value, dict):
+                self.tag_output(value, tags)
+            elif isinstance(value, list):
+                for v in value:
+                    self.tag_output(value, tags)
+
+            if isinstance(value, str):
+                if regex.search(IP_ONLY_REGEX, value):
+                    tags['network.static.ip'].append(value)
+                elif regex.search(DOMAIN_ONLY_REGEX, value):
+                    tags['network.static.domain'].append(value)
+                elif regex.search(FULL_URI, value):
+                    tags['network.static.uri'].append(value)
+
+    def execute(self, request):
+        result = Result()
+        config_result = self.cx.run_parsers(request.file_path)
+        tags = defaultdict(list)
+        self.tag_output(config_result, tags)
+        result.add_section(ResultSection('Output', body=json.dumps(config_result),
+                                         body_format=BODY_FORMAT.JSON, tags=tags))
+        request.result = result
diff --git a/configextractor_/update_server.py b/configextractor_/update_server.py
new file mode 100644
index 0000000..a91de00
--- /dev/null
+++ b/configextractor_/update_server.py
@@ -0,0 +1,87 @@
+import os
+import shutil
+import tempfile
+
+from assemblyline.common import forge
+from assemblyline.common.isotime import epoch_to_iso
+from assemblyline.odm.models.signature import Signature
+from assemblyline_client import get_client
+from assemblyline_v4_service.updater.updater import ServiceUpdater, temporary_api_key, UPDATER_DIR, UI_SERVER
+from configextractor.main import ConfigExtractor
+
+
+classification = forge.get_classification()
+
+
+class CXUpdateServer(ServiceUpdater):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def import_update(self, files_sha256, client, source_name, default_classification=classification.UNRESTRICTED):
+        def import_parsers(cx: ConfigExtractor):
+            upload_list = list()
+            parser_paths = cx.parsers.keys()
+            for parser_path in parser_paths:
+                parser_details = cx.get_details(parser_path)
+                if parser_details:
+                    upload_list.append(Signature(dict(
+                        classification=parser_details['classification'] or default_classification,
+                        data=open(parser_path, 'r').read(),
+                        name=parser_details['name'],
+                        signature_id=f"{parser_details['framework']}_{os.path.basename(parser_path)}",
+                        source=source_name,
+                        type='configextractor',
+                        status="DEPLOYED",
+                    )).as_primitives())
+            return client.signature.add_update_many(source_name, 'configextractor', upload_list, dedup_name=False)
+
+        for dir, _ in files_sha256:
+            # Remove cached duplicates
+            dir = dir[:-1]
+            self.log.info(dir)
+            cx = ConfigExtractor(parsers_dir=dir, logger=self.log, check_extension=True)
+            resp = import_parsers(cx)
+            self.log.info(f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline.")
+            self.log.debug(resp)
+
+            # Save a local copy of the directory that may potentially contain dependency libraries for the parsers
+            try:
+                shutil.move(dir, os.path.join(self.latest_updates_dir, source_name))
+            except shutil.Error as e:
+                if 'already exists' in str(e):
+                    continue
+                raise e
+
+    def do_local_update(self) -> None:
+        old_update_time = self.get_local_update_time()
+        if not os.path.exists(UPDATER_DIR):
+            os.makedirs(UPDATER_DIR)
+
+        _, time_keeper = tempfile.mkstemp(prefix="time_keeper_", dir=UPDATER_DIR)
+        self.log.info("Setup service account.")
+        username = self.ensure_service_account()
+        self.log.info("Create temporary API key.")
+        with temporary_api_key(self.datastore, username) as api_key:
+            self.log.info(f"Connecting to Assemblyline API: {UI_SERVER}")
+            al_client = get_client(UI_SERVER, apikey=(username, api_key), verify=False)
+
+            # Check if new signatures have been added
+            self.log.info("Check for new signatures.")
+            if al_client.signature.update_available(since=epoch_to_iso(old_update_time) or '',
+                                                    sig_type=self.updater_type)['update_available']:
+                self.log.info("An update is available for download from the datastore")
+                self.log.debug(f"{self.updater_type} update available since {epoch_to_iso(old_update_time) or ''}")
+
+                blocklisted_parsers = list()
+                [blocklisted_parsers.extend(list(item.values()))
+                    for item in al_client.search.signature(f'type:{self.updater_type} AND status:DISABLED',
+                                                           fl='id')['items']]
+                self.log.debug(f'Blocking the following parsers: {blocklisted_parsers}')
+                output_directory = self.prepare_output_directory()
+                open(os.path.join(output_directory, 'blocked_parsers'), 'w').write('\n'.join(blocklisted_parsers))
+                self.serve_directory(output_directory, time_keeper)
+
+
+if __name__ == '__main__':
+    with CXUpdateServer() as server:
+        server.serve_forever()
diff --git a/service_manifest.yml b/service_manifest.yml
index 374b7a6..6227725 100755
--- a/service_manifest.yml
+++ b/service_manifest.yml
@@ -17,20 +17,14 @@ timeout: 10
 
 disable_cache: false
 
-enabled: true
+# Disabled at first to Administrator can assign appropriate storage class
+enabled: false
 
 is_external: false
 
 licence_count: 0
 
-uses_tags: true
-
-submission_params:
-  # Use CAPE parsers
-  - name: use_cape
-    type: bool
-    value: false
-    default: false
+uses_tags: false
 
 # Service configuration block (dictionary of config variables)
 heuristics:
@@ -59,3 +53,33 @@ docker_config:
   image: ${REGISTRY}cccs/assemblyline-service-configextractor:$SERVICE_TAG
   cpu_cores: 1.0
   ram_mb: 1024
+
+dependencies:
+  updates:
+    container:
+      allow_internet_access: true
+      command: ["python", "-m", "configextractor_.update_server"]
+      image: ${REGISTRY}cccs/assemblyline-service-configextractor:$SERVICE_TAG
+      ports: ["5003"]
+      environment:
+        - name: UPDATER_DIR
+          value: /mnt/updates
+
+    run_as_core: True
+    volumes:
+      updates:
+        mount_path: /mnt/updates
+        capacity: 5120
+        storage_class: default
+
+
+update_config:
+  generates_signatures: true
+  sources:
+    - name: CAPE
+      pattern: .*/modules/processing/parsers/CAPE/$
+      uri: https://github.com/kevoreilly/CAPEv2.git
+      classification: TLP:W
+  update_interval_seconds: 21600 # Quarter-day (every 6 hours)
+  wait_for_update: true
+  signature_delimiter: file

From dacc6d9774ec653afa4fb1bf1d0948c993b88baa Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Mon, 18 Jul 2022 14:44:25 +0000
Subject: [PATCH 02/23] fix classification setting for source

---
 service_manifest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/service_manifest.yml b/service_manifest.yml
index 6227725..2969e1c 100755
--- a/service_manifest.yml
+++ b/service_manifest.yml
@@ -79,7 +79,7 @@ update_config:
     - name: CAPE
       pattern: .*/modules/processing/parsers/CAPE/$
       uri: https://github.com/kevoreilly/CAPEv2.git
-      classification: TLP:W
+      default_classification: TLP:W
   update_interval_seconds: 21600 # Quarter-day (every 6 hours)
   wait_for_update: true
   signature_delimiter: file

From 25be8ff439c2361a43fc9c66925ae52ef80ba86f Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Mon, 18 Jul 2022 14:49:50 +0000
Subject: [PATCH 03/23] remove testing for now

---
 pipelines/azure-build.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pipelines/azure-build.yaml b/pipelines/azure-build.yaml
index 0a8a036..0ad07be 100644
--- a/pipelines/azure-build.yaml
+++ b/pipelines/azure-build.yaml
@@ -27,9 +27,9 @@ stages:
               if [[ "$TAG" == *stable* ]]; then export BUILD_TYPE=stable; else export BUILD_TYPE=latest; fi
               docker build --build-arg version=$TAG --build-arg branch=$BUILD_TYPE -t cccs/assemblyline-service-configextractor:$TAG -t cccs/assemblyline-service-configextractor:$BUILD_TYPE -f ./Dockerfile .
             displayName: Build containers
-          - script: |
-              docker run -v `pwd`/test/:/opt/al_service/test/ cccs/assemblyline-service-configextractor:latest bash -c 'pip install -U -r test/requirements.txt; pytest'
-            displayName: Test containers
+          # - script: |
+          #     docker run -v `pwd`/test/:/opt/al_service/test/ cccs/assemblyline-service-configextractor:latest bash -c 'pip install -U -r test/requirements.txt; pytest'
+          #   displayName: Test containers
           - script: |
               docker push cccs/assemblyline-service-configextractor --all-tags
             displayName: Deploy to Docker Hub

From a8f6287ae101e11f306eaec2fed5cf5255f25c26 Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Mon, 18 Jul 2022 15:00:37 +0000
Subject: [PATCH 04/23] fix kwarg on update

---
 configextractor_/update_server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configextractor_/update_server.py b/configextractor_/update_server.py
index a91de00..4a97574 100644
--- a/configextractor_/update_server.py
+++ b/configextractor_/update_server.py
@@ -39,7 +39,7 @@ def import_parsers(cx: ConfigExtractor):
             # Remove cached duplicates
             dir = dir[:-1]
             self.log.info(dir)
-            cx = ConfigExtractor(parsers_dir=dir, logger=self.log, check_extension=True)
+            cx = ConfigExtractor(parsers_dirs=[dir], logger=self.log, check_extension=True)
             resp = import_parsers(cx)
             self.log.info(f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline.")
             self.log.debug(resp)

From 3366f0cfe8e4199337363d886943c006a63b6391 Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Mon, 18 Jul 2022 15:43:31 +0000
Subject: [PATCH 05/23] only split on the first 3

---
 configextractor_/configextractor_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index bebe6ed..4c77a40 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -43,7 +43,7 @@ def _load_rules(self) -> None:
             blocklist_location = os.path.join(self.rules_directory, 'blocked_parsers')
             if os.path.exists(blocklist_location):
                 for line in open(blocklist_location, 'r').readlines():
-                    _, source, _, parser_name = line.split('_')
+                    _, source, _, parser_name = line.split('_', 3)
                     blocklist.append(rf"*{parser_name}$")
             self.log.info(f'Blocking the following parsers matching these patterns: {blocklist}')
             self.cx = CX(parsers_dirs=self.rules_list, logger=self.log, parser_blocklist=blocklist)

From 1cd34bad8d1eed09053201f3f859a36b4bd8b56f Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Tue, 2 Aug 2022 15:54:29 +0000
Subject: [PATCH 06/23] fix regex for blocklist

---
 configextractor_/configextractor_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index 4c77a40..d74e317 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -44,7 +44,7 @@ def _load_rules(self) -> None:
             if os.path.exists(blocklist_location):
                 for line in open(blocklist_location, 'r').readlines():
                     _, source, _, parser_name = line.split('_', 3)
-                    blocklist.append(rf"*{parser_name}$")
+                    blocklist.append(rf".*{parser_name}$")
             self.log.info(f'Blocking the following parsers matching these patterns: {blocklist}')
             self.cx = CX(parsers_dirs=self.rules_list, logger=self.log, parser_blocklist=blocklist)
 

From b288d18dea7136a536561b74554aa46ada223f8a Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Tue, 2 Aug 2022 17:32:50 +0000
Subject: [PATCH 07/23] make tagging more robust

---
 configextractor_/configextractor_.py | 34 +++++++++++++++++-----------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index d74e317..1144a3c 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -1,4 +1,5 @@
 from collections import defaultdict
+from typing import Any
 
 from assemblyline.common import forge
 from assemblyline.odm.base import IP_ONLY_REGEX, FULL_URI, DOMAIN_ONLY_REGEX
@@ -55,21 +56,28 @@ def _load_rules(self) -> None:
             raise Exception(
                 f"Unable to start ConfigExtractor because can't find parsers in given directory: {self.rules_directory}")
 
-    def tag_output(self, output: dict, tags: dict = {}):
-        for value in output.values():
-            if isinstance(value, dict):
-                self.tag_output(value, tags)
-            elif isinstance(value, list):
-                for v in value:
+    # Temporary tagging method until CAPE is switched over to MACO modelling
+    def tag_output(self, output: Any, tags: dict = {}):
+        def tag_string(value):
+            if regex.search(IP_ONLY_REGEX, value):
+                tags['network.static.ip'].append(value)
+            elif regex.search(DOMAIN_ONLY_REGEX, value):
+                tags['network.static.domain'].append(value)
+            elif regex.search(FULL_URI, value):
+                tags['network.static.uri'].append(value)
+
+        if isinstance(output, dict):
+            # Iterate over valuse of dictionary
+            for value in output.values():
+                if isinstance(value, dict):
                     self.tag_output(value, tags)
+                elif isinstance(value, list):
+                    [self.tag_output(v, tags) for v in value]
+                elif isinstance(value, str):
+                    tag_string(value)
 
-            if isinstance(value, str):
-                if regex.search(IP_ONLY_REGEX, value):
-                    tags['network.static.ip'].append(value)
-                elif regex.search(DOMAIN_ONLY_REGEX, value):
-                    tags['network.static.domain'].append(value)
-                elif regex.search(FULL_URI, value):
-                    tags['network.static.uri'].append(value)
+        elif isinstance(value, str):
+            tag_string(value)
 
     def execute(self, request):
         result = Result()

From f5eba84a4ce53ce571f22a595dc9691534618077 Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Tue, 2 Aug 2022 17:55:04 +0000
Subject: [PATCH 08/23] fix copy-paste

---
 configextractor_/configextractor_.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index 1144a3c..793ff12 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -76,8 +76,8 @@ def tag_string(value):
                 elif isinstance(value, str):
                     tag_string(value)
 
-        elif isinstance(value, str):
-            tag_string(value)
+        elif isinstance(output, str):
+            tag_string(output)
 
     def execute(self, request):
         result = Result()

From 989e0bffe9b334b5a82d369aea9fd7e9a9de3977 Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Mon, 15 Aug 2022 13:01:05 +0000
Subject: [PATCH 09/23] Handle updating sources better; misc. changes

---
 Dockerfile                        |  8 ++++----
 configextractor_/update_server.py | 29 ++++++++++++++++++-----------
 service_manifest.yml              |  6 +++---
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 2bf8b55..6669d06 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,13 +2,13 @@ ARG branch=latest
 FROM cccs/assemblyline-v4-service-base:$branch AS base
 
 ENV SERVICE_PATH configextractor_.configextractor_.ConfigExtractor
-ENV YARA_VERSION=4.2.0
+ENV YARA_VERSION=4.2.3
 
 USER assemblyline
-#RUN pip uninstall -y yara-python
+RUN pip uninstall -y yara-python
 
 USER root
-RUN apt-get update && apt-get install -y git libssl1.1 libmagic1 upx-ucl && rm -rf /var/lib/apt/lists/*
+RUN apt-get update && apt-get install -y git libssl1.1 libmagic1 upx-ucl mono-complete && rm -rf /var/lib/apt/lists/*
 # Create a temporary image to do our compiling in
 FROM base AS build
 
@@ -32,7 +32,7 @@ RUN touch /tmp/before-pip
 RUN pip install -U git+https://github.com/CybercentreCanada/configextractor-py@revamp
 RUN pip install -U git+https://github.com/CybercentreCanada/maco
 
-RUN pip install --no-cache-dir --user --global-option="build" --global-option="--enable-dotnet" --global-option="--enable-magic" git+https://github.com/VirusTotal/yara-python.git
+RUN pip install --no-cache-dir --user --global-option="build" --global-option="--enable-dotnet" --global-option="--enable-magic" git+https://github.com/VirusTotal/yara-python.git@d29ca083f4cb25ea52988314b844bb7cf8594b5b
 RUN pip install --no-cache-dir --user gitpython plyara markupsafe==2.0.1
 
 # Public libraries that can be used by parsers
diff --git a/configextractor_/update_server.py b/configextractor_/update_server.py
index 4a97574..58d4187 100644
--- a/configextractor_/update_server.py
+++ b/configextractor_/update_server.py
@@ -21,6 +21,7 @@ def import_update(self, files_sha256, client, source_name, default_classificatio
         def import_parsers(cx: ConfigExtractor):
             upload_list = list()
             parser_paths = cx.parsers.keys()
+            self.log.debug(f"Importing following parsers: {parser_paths}")
             for parser_path in parser_paths:
                 parser_details = cx.get_details(parser_path)
                 if parser_details:
@@ -39,18 +40,24 @@ def import_parsers(cx: ConfigExtractor):
             # Remove cached duplicates
             dir = dir[:-1]
             self.log.info(dir)
-            cx = ConfigExtractor(parsers_dirs=[dir], logger=self.log, check_extension=True)
-            resp = import_parsers(cx)
-            self.log.info(f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline.")
-            self.log.debug(resp)
+            cx = ConfigExtractor(parsers_dirs=[dir], logger=self.log)
+            if cx.parsers:
+                self.log.info(f"Found {len(cx.parsers)} parsers from {source_name}")
+                resp = import_parsers(cx)
+                self.log.info(f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline.")
+                self.log.debug(resp)
 
-            # Save a local copy of the directory that may potentially contain dependency libraries for the parsers
-            try:
-                shutil.move(dir, os.path.join(self.latest_updates_dir, source_name))
-            except shutil.Error as e:
-                if 'already exists' in str(e):
-                    continue
-                raise e
+                # Save a local copy of the directory that may potentially contain dependency libraries for the parsers
+                try:
+                    destination = os.path.join(self.latest_updates_dir, source_name)
+                    # Removing old version of directory if exists
+                    if os.path.exists(destination):
+                        shutil.rmtree(destination)
+                    shutil.move(dir, destination)
+                except shutil.Error as e:
+                    if 'already exists' in str(e):
+                        continue
+                    raise e
 
     def do_local_update(self) -> None:
         old_update_time = self.get_local_update_time()
diff --git a/service_manifest.yml b/service_manifest.yml
index 2969e1c..9f458c6 100755
--- a/service_manifest.yml
+++ b/service_manifest.yml
@@ -69,16 +69,16 @@ dependencies:
     volumes:
       updates:
         mount_path: /mnt/updates
-        capacity: 5120
+        capacity: 1048576 #1Gi
         storage_class: default
 
-
 update_config:
   generates_signatures: true
   sources:
+    # Pending: https://github.com/kevoreilly/CAPEv2/pull/1037
     - name: CAPE
       pattern: .*/modules/processing/parsers/CAPE/$
-      uri: https://github.com/kevoreilly/CAPEv2.git
+      uri: https://github.com/cccs-rs/CAPEv2.git
       default_classification: TLP:W
   update_interval_seconds: 21600 # Quarter-day (every 6 hours)
   wait_for_update: true

From 28e3f4a7149d76d4308b2623be0e7280008340a1 Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Tue, 16 Aug 2022 18:15:54 +0000
Subject: [PATCH 10/23] Update heuristics; Network IOCs section

---
 configextractor_/configextractor_.py | 59 +++++++++++++++++++++++++---
 service_manifest.yml                 | 22 ++++++-----
 2 files changed, 66 insertions(+), 15 deletions(-)

diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index 793ff12..3cf0714 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -5,18 +5,21 @@
 from assemblyline.odm.base import IP_ONLY_REGEX, FULL_URI, DOMAIN_ONLY_REGEX
 from assemblyline.odm.models.tagging import Tagging
 from assemblyline_v4_service.common.base import ServiceBase
-from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT
+from assemblyline_v4_service.common.result import Result, ResultSection, ResultTableSection, BODY_FORMAT, TableRow, Heuristic
 
 import json
 import hashlib
 import os
 import regex
+import tempfile
 
 from configextractor.main import ConfigExtractor as CX
-
+from maco.model import ExtractorModel, ConnUsageEnum
 
 cl_engine = forge.get_classification()
 
+CONNECTION_USAGE = [k.name for k in ConnUsageEnum]
+
 
 class ConfigExtractor(ServiceBase):
     def __init__(self, config=None):
@@ -79,11 +82,55 @@ def tag_string(value):
         elif isinstance(output, str):
             tag_string(output)
 
+    def network_ioc_section(self, config) -> ResultSection:
+        network_section = ResultSection("Network IOCs")
+
+        network_fields = {
+            'ftp': ExtractorModel.FTP,
+            'smtp': ExtractorModel.SMTP,
+            'http': ExtractorModel.Http,
+            'ssh': ExtractorModel.SSH,
+            'proxy': ExtractorModel.Proxy,
+            'dns': ExtractorModel.DNS,
+            'tcp': ExtractorModel.Connection,
+            'udp': ExtractorModel.Connection
+        }
+        for field, model in network_fields.items():
+            sorted_network_config = {}
+            for network_config in config.get(field, []):
+                sorted_network_config.setdefault(network_config.get('usage', 'other'), []).append(network_config)
+
+            if sorted_network_config:
+                connection_section = ResultSection(field.upper(), parent=network_section)
+                for usage, connections in sorted_network_config.items():
+                    tags = list()
+                    self.tag_output(connections, tags)
+                    table_section = ResultTableSection(title_text=f"Usage: {usage.upper()} x{len(connections)}", parent=connection_section, heuristic=Heuristic(2, signature=usage), tags=tags)
+                    [table_section.add_row(TableRow(**model(**c).dict())) for c in connections]
+
+        if network_section.subsections:
+            return network_section
+
     def execute(self, request):
         result = Result()
         config_result = self.cx.run_parsers(request.file_path)
-        tags = defaultdict(list)
-        self.tag_output(config_result, tags)
-        result.add_section(ResultSection('Output', body=json.dumps(config_result),
-                                         body_format=BODY_FORMAT.JSON, tags=tags))
+        if not config_result:
+            request.result = result
+            return
+
+        a = tempfile.NamedTemporaryFile(delete=False)
+        a.write(json.dumps(config_result).encode())
+        a.seek(0)
+        request.add_supplementary(a.name, f"{request.sha256}_malware_config.json", "Raw output from configextractor-py")
+        for parser_framework, parser_results in config_result.items():
+            framework_section = ResultSection(parser_framework, parent=result, auto_collapse=True)
+            for parser_name, parser_output in parser_results.items():
+                config = parser_output.pop('config')
+                parser_output['family'] = config.pop('family')
+                parser_section = ResultSection(title_text=parser_name, body=json.dumps(parser_output), parent=framework_section, body_format=BODY_FORMAT.KEY_VALUE)
+                network_section = self.network_ioc_section(config)
+                if network_section:
+                    parser_section.add_subsection(network_section)
+
+
         request.result = result
diff --git a/service_manifest.yml b/service_manifest.yml
index 9f458c6..fa34b04 100755
--- a/service_manifest.yml
+++ b/service_manifest.yml
@@ -33,18 +33,22 @@ heuristics:
     score: 1000
     filetype: "*"
     description: Category - Malware - Indicates configuration block was extracted
-
   - heur_id: 2
-    name: Safe
-    score: -1000
-    filetype: "*"
-    description: Category - Safe
-
-  - heur_id: 3
     name: De-obfuscated Network IOCs
-    score: 1000
+    score: 50
     filetype: "*"
-    description: Category - Malicious - Indicates a network IOC was extracted from malware configuration
+    signature_score_map:
+      # Connection usage may be indicative of maliciousness
+      c2: 1000
+      upload: 1000
+      download: 1000
+      propagate: 1000
+      tunnel: 1000
+      ransom: 1000
+      decoy: 10 # Used to mask actual malicious connection but the connections themselves aren't malicious
+      other: 10
+
+    description: Indicates a network IOC was extracted from malware configuration
 
 # Docker configuration block which defines:
 #  - the name of the docker container that will be created

From 3fcf1e0adf80898b2d7c45383e4b4979976e2207 Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Wed, 17 Aug 2022 16:40:34 +0000
Subject: [PATCH 11/23] Setup linking from results to signatures

---
 Dockerfile                           |  4 +++-
 configextractor_/configextractor_.py |  8 +++++++-
 configextractor_/update_server.py    | 24 ++++++++++++++++++------
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 6669d06..8d04c93 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,7 +5,7 @@ ENV SERVICE_PATH configextractor_.configextractor_.ConfigExtractor
 ENV YARA_VERSION=4.2.3
 
 USER assemblyline
-RUN pip uninstall -y yara-python
+#RUN pip uninstall -y yara-python
 
 USER root
 RUN apt-get update && apt-get install -y git libssl1.1 libmagic1 upx-ucl mono-complete && rm -rf /var/lib/apt/lists/*
@@ -47,6 +47,8 @@ COPY --chown=assemblyline:assemblyline --from=build /var/lib/assemblyline/.local
 # Create directories
 RUN mkdir -p /mount/updates
 RUN mkdir -p /opt/al_service
+RUN mkdir -p /updates
+RUN chown -R assemblyline:assemblyline /updates
 
 # Copy service code
 WORKDIR /opt/al_service
diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index 3cf0714..bf82417 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -25,6 +25,7 @@ class ConfigExtractor(ServiceBase):
     def __init__(self, config=None):
         super(ConfigExtractor, self).__init__(config)
         self.cx = None
+        self.source_map = None
 
     # Generate the rules_hash and init rules_list based on the raw files in the rules_directory from updater
     def _gen_rules_hash(self) -> str:
@@ -45,6 +46,7 @@ def _load_rules(self) -> None:
             self.log.debug(self.rules_list)
             blocklist = []
             blocklist_location = os.path.join(self.rules_directory, 'blocked_parsers')
+            self.source_map = json.loads(open(os.path.join(self.rules_directory, 'source_mapping.json')).read())
             if os.path.exists(blocklist_location):
                 for line in open(blocklist_location, 'r').readlines():
                     _, source, _, parser_name = line.split('_', 3)
@@ -127,7 +129,11 @@ def execute(self, request):
             for parser_name, parser_output in parser_results.items():
                 config = parser_output.pop('config')
                 parser_output['family'] = config.pop('family')
-                parser_section = ResultSection(title_text=parser_name, body=json.dumps(parser_output), parent=framework_section, body_format=BODY_FORMAT.KEY_VALUE)
+                id = f'{parser_framework}_{parser_name}'
+                id_details = self.source_map[id]
+                parser_section = ResultSection(title_text=parser_name, body=json.dumps(parser_output),
+                parent=framework_section, body_format=BODY_FORMAT.KEY_VALUE,
+                tags={'file.rule.configextractor': f"{id_details['source_name']}.{parser_name}"}, classification=id_details['classification'])
                 network_section = self.network_ioc_section(config)
                 if network_section:
                     parser_section.add_subsection(network_section)
diff --git a/configextractor_/update_server.py b/configextractor_/update_server.py
index 58d4187..0ca5b07 100644
--- a/configextractor_/update_server.py
+++ b/configextractor_/update_server.py
@@ -1,4 +1,5 @@
 import os
+import json
 import shutil
 import tempfile
 
@@ -12,7 +13,6 @@
 
 classification = forge.get_classification()
 
-
 class CXUpdateServer(ServiceUpdater):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -22,19 +22,23 @@ def import_parsers(cx: ConfigExtractor):
             upload_list = list()
             parser_paths = cx.parsers.keys()
             self.log.debug(f"Importing following parsers: {parser_paths}")
+            source_map = {}
             for parser_path in parser_paths:
                 parser_details = cx.get_details(parser_path)
                 if parser_details:
+                    id = f"{parser_details['framework']}_{parser_details['name']}"
+                    classification = parser_details['classification'] or default_classification
+                    source_map[id] = dict(classification=classification, source_name=source_name)
                     upload_list.append(Signature(dict(
-                        classification=parser_details['classification'] or default_classification,
+                        classification=classification,
                         data=open(parser_path, 'r').read(),
                         name=parser_details['name'],
-                        signature_id=f"{parser_details['framework']}_{os.path.basename(parser_path)}",
+                        signature_id=id,
                         source=source_name,
                         type='configextractor',
                         status="DEPLOYED",
                     )).as_primitives())
-            return client.signature.add_update_many(source_name, 'configextractor', upload_list, dedup_name=False)
+            return client.signature.add_update_many(source_name, 'configextractor', upload_list, dedup_name=False), source_map
 
         for dir, _ in files_sha256:
             # Remove cached duplicates
@@ -43,17 +47,25 @@ def import_parsers(cx: ConfigExtractor):
             cx = ConfigExtractor(parsers_dirs=[dir], logger=self.log)
             if cx.parsers:
                 self.log.info(f"Found {len(cx.parsers)} parsers from {source_name}")
-                resp = import_parsers(cx)
+                resp, source_map = import_parsers(cx)
                 self.log.info(f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline.")
                 self.log.debug(resp)
+                self.log.debug(source_map)
 
                 # Save a local copy of the directory that may potentially contain dependency libraries for the parsers
                 try:
                     destination = os.path.join(self.latest_updates_dir, source_name)
+                    source_mapping_file = os.path.join(self.latest_updates_dir, 'source_mapping.json')
                     # Removing old version of directory if exists
                     if os.path.exists(destination):
                         shutil.rmtree(destination)
                     shutil.move(dir, destination)
+                    if os.path.exists(source_mapping_file):
+                        _tmp = json.loads(open(source_mapping_file, 'r').read())
+                        _tmp.update(source_map)
+                        source_map = _tmp
+
+                    open(source_mapping_file, 'w').write(json.dumps(source_map))
                 except shutil.Error as e:
                     if 'already exists' in str(e):
                         continue
@@ -64,7 +76,6 @@ def do_local_update(self) -> None:
         if not os.path.exists(UPDATER_DIR):
             os.makedirs(UPDATER_DIR)
 
-        _, time_keeper = tempfile.mkstemp(prefix="time_keeper_", dir=UPDATER_DIR)
         self.log.info("Setup service account.")
         username = self.ensure_service_account()
         self.log.info("Create temporary API key.")
@@ -76,6 +87,7 @@ def do_local_update(self) -> None:
             self.log.info("Check for new signatures.")
             if al_client.signature.update_available(since=epoch_to_iso(old_update_time) or '',
                                                     sig_type=self.updater_type)['update_available']:
+                _, time_keeper = tempfile.mkstemp(prefix="time_keeper_", dir=UPDATER_DIR)
                 self.log.info("An update is available for download from the datastore")
                 self.log.debug(f"{self.updater_type} update available since {epoch_to_iso(old_update_time) or ''}")
 

From d504b4db0787fa4c5874fee31d15a425e96e36f0 Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Wed, 17 Aug 2022 16:43:39 +0000
Subject: [PATCH 12/23] undo dev changes in Dockerfile

---
 Dockerfile | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 8d04c93..6669d06 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,7 +5,7 @@ ENV SERVICE_PATH configextractor_.configextractor_.ConfigExtractor
 ENV YARA_VERSION=4.2.3
 
 USER assemblyline
-#RUN pip uninstall -y yara-python
+RUN pip uninstall -y yara-python
 
 USER root
 RUN apt-get update && apt-get install -y git libssl1.1 libmagic1 upx-ucl mono-complete && rm -rf /var/lib/apt/lists/*
@@ -47,8 +47,6 @@ COPY --chown=assemblyline:assemblyline --from=build /var/lib/assemblyline/.local
 # Create directories
 RUN mkdir -p /mount/updates
 RUN mkdir -p /opt/al_service
-RUN mkdir -p /updates
-RUN chown -R assemblyline:assemblyline /updates
 
 # Copy service code
 WORKDIR /opt/al_service

From 73b6d4314d658aa05a7ccf8deafe3685a3d2b6af Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Wed, 17 Aug 2022 17:49:57 +0000
Subject: [PATCH 13/23] Allow updater to fetch python packages for service

---
 configextractor_/configextractor_.py |  5 +++++
 configextractor_/update_server.py    | 13 +++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index bf82417..ca40572 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -11,6 +11,7 @@
 import hashlib
 import os
 import regex
+import sys
 import tempfile
 
 from configextractor.main import ConfigExtractor as CX
@@ -47,6 +48,10 @@ def _load_rules(self) -> None:
             blocklist = []
             blocklist_location = os.path.join(self.rules_directory, 'blocked_parsers')
             self.source_map = json.loads(open(os.path.join(self.rules_directory, 'source_mapping.json')).read())
+            python_packages_dir = os.path.join(self.rules_directory, 'python_packages')
+            if python_packages_dir not in sys.path:
+                sys.path.append(python_packages_dir)
+
             if os.path.exists(blocklist_location):
                 for line in open(blocklist_location, 'r').readlines():
                     _, source, _, parser_name = line.split('_', 3)
diff --git a/configextractor_/update_server.py b/configextractor_/update_server.py
index 0ca5b07..e84ef35 100644
--- a/configextractor_/update_server.py
+++ b/configextractor_/update_server.py
@@ -1,8 +1,10 @@
 import os
 import json
 import shutil
+import subprocess
 import tempfile
 
+
 from assemblyline.common import forge
 from assemblyline.common.isotime import epoch_to_iso
 from assemblyline.odm.models.signature import Signature
@@ -52,6 +54,17 @@ def import_parsers(cx: ConfigExtractor):
                 self.log.debug(resp)
                 self.log.debug(source_map)
 
+                # Find any requirement files and pip install to a specific directory that will get transferred to services
+                for root, _, files in os.walk(dir):
+                    for file in files:
+                        if file == "requirements.txt":
+                            err = subprocess.run(['pip', 'install',
+                            '-r', os.path.join(root, file),
+                            '-t', os.path.join(self.latest_updates_dir, 'python_packages')]).stderr
+                            if err:
+                                self.log.error(err)
+
+
                 # Save a local copy of the directory that may potentially contain dependency libraries for the parsers
                 try:
                     destination = os.path.join(self.latest_updates_dir, source_name)

From 1ac3fda15ea638c31f2384160e78e882922178ae Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Wed, 17 Aug 2022 19:27:39 +0000
Subject: [PATCH 14/23] black formatting

---
 configextractor_/configextractor_.py | 153 ++++++++++++++++++++-------
 configextractor_/update_server.py    | 114 ++++++++++++++------
 2 files changed, 194 insertions(+), 73 deletions(-)

diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index ca40572..d857ba8 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -1,11 +1,16 @@
-from collections import defaultdict
 from typing import Any
 
 from assemblyline.common import forge
 from assemblyline.odm.base import IP_ONLY_REGEX, FULL_URI, DOMAIN_ONLY_REGEX
-from assemblyline.odm.models.tagging import Tagging
 from assemblyline_v4_service.common.base import ServiceBase
-from assemblyline_v4_service.common.result import Result, ResultSection, ResultTableSection, BODY_FORMAT, TableRow, Heuristic
+from assemblyline_v4_service.common.result import (
+    Result,
+    ResultSection,
+    ResultTableSection,
+    BODY_FORMAT,
+    TableRow,
+    Heuristic,
+)
 
 import json
 import hashlib
@@ -40,41 +45,54 @@ def _gen_rules_hash(self) -> str:
         if len(all_sha256s) == 1:
             return all_sha256s[0][:7]
 
-        return hashlib.sha256(' '.join(sorted(all_sha256s)).encode('utf-8')).hexdigest()[:7]
+        return hashlib.sha256(
+            " ".join(sorted(all_sha256s)).encode("utf-8")
+        ).hexdigest()[:7]
 
     def _load_rules(self) -> None:
         if self.rules_list:
             self.log.debug(self.rules_list)
             blocklist = []
-            blocklist_location = os.path.join(self.rules_directory, 'blocked_parsers')
-            self.source_map = json.loads(open(os.path.join(self.rules_directory, 'source_mapping.json')).read())
-            python_packages_dir = os.path.join(self.rules_directory, 'python_packages')
+            blocklist_location = os.path.join(self.rules_directory, "blocked_parsers")
+            self.source_map = json.loads(
+                open(os.path.join(self.rules_directory, "source_mapping.json")).read()
+            )
+            python_packages_dir = os.path.join(self.rules_directory, "python_packages")
             if python_packages_dir not in sys.path:
                 sys.path.append(python_packages_dir)
 
             if os.path.exists(blocklist_location):
-                for line in open(blocklist_location, 'r').readlines():
-                    _, source, _, parser_name = line.split('_', 3)
+                for line in open(blocklist_location, "r").readlines():
+                    _, source, _, parser_name = line.split("_", 3)
                     blocklist.append(rf".*{parser_name}$")
-            self.log.info(f'Blocking the following parsers matching these patterns: {blocklist}')
-            self.cx = CX(parsers_dirs=self.rules_list, logger=self.log, parser_blocklist=blocklist)
+            self.log.info(
+                f"Blocking the following parsers matching these patterns: {blocklist}"
+            )
+            self.cx = CX(
+                parsers_dirs=self.rules_list,
+                logger=self.log,
+                parser_blocklist=blocklist,
+            )
 
         if not self.cx:
-            raise Exception("Unable to start ConfigExtractor because can't find directory containing parsers")
+            raise Exception(
+                "Unable to start ConfigExtractor because can't find directory containing parsers"
+            )
 
         if not self.cx.parsers:
             raise Exception(
-                f"Unable to start ConfigExtractor because can't find parsers in given directory: {self.rules_directory}")
+                f"Unable to start ConfigExtractor because can't find parsers in given directory: {self.rules_directory}"
+            )
 
     # Temporary tagging method until CAPE is switched over to MACO modelling
     def tag_output(self, output: Any, tags: dict = {}):
         def tag_string(value):
             if regex.search(IP_ONLY_REGEX, value):
-                tags['network.static.ip'].append(value)
+                tags["network.static.ip"].append(value)
             elif regex.search(DOMAIN_ONLY_REGEX, value):
-                tags['network.static.domain'].append(value)
+                tags["network.static.domain"].append(value)
             elif regex.search(FULL_URI, value):
-                tags['network.static.uri'].append(value)
+                tags["network.static.uri"].append(value)
 
         if isinstance(output, dict):
             # Iterate over valuse of dictionary
@@ -93,27 +111,40 @@ def network_ioc_section(self, config) -> ResultSection:
         network_section = ResultSection("Network IOCs")
 
         network_fields = {
-            'ftp': ExtractorModel.FTP,
-            'smtp': ExtractorModel.SMTP,
-            'http': ExtractorModel.Http,
-            'ssh': ExtractorModel.SSH,
-            'proxy': ExtractorModel.Proxy,
-            'dns': ExtractorModel.DNS,
-            'tcp': ExtractorModel.Connection,
-            'udp': ExtractorModel.Connection
+            "ftp": ExtractorModel.FTP,
+            "smtp": ExtractorModel.SMTP,
+            "http": ExtractorModel.Http,
+            "ssh": ExtractorModel.SSH,
+            "proxy": ExtractorModel.Proxy,
+            "dns": ExtractorModel.DNS,
+            "tcp": ExtractorModel.Connection,
+            "udp": ExtractorModel.Connection,
         }
         for field, model in network_fields.items():
             sorted_network_config = {}
-            for network_config in config.get(field, []):
-                sorted_network_config.setdefault(network_config.get('usage', 'other'), []).append(network_config)
+            for network_config in config.pop(field, []):
+                sorted_network_config.setdefault(
+                    network_config.get("usage", "other"), []
+                ).append(network_config)
 
             if sorted_network_config:
-                connection_section = ResultSection(field.upper(), parent=network_section)
+                connection_section = ResultSection(
+                    field.upper(), parent=network_section
+                )
                 for usage, connections in sorted_network_config.items():
                     tags = list()
-                    self.tag_output(connections, tags)
-                    table_section = ResultTableSection(title_text=f"Usage: {usage.upper()} x{len(connections)}", parent=connection_section, heuristic=Heuristic(2, signature=usage), tags=tags)
-                    [table_section.add_row(TableRow(**model(**c).dict())) for c in connections]
+                    if usage not in ["decoy"]:
+                        self.tag_output(connections, tags)
+                        heuristic = Heuristic(2, signature=usage)
+                        table_section = ResultTableSection(
+                            title_text=f"Usage: {usage.upper()} x{len(connections)}",
+                            parent=connection_section,
+                            heuristic=heuristic,
+                            tags=tags,
+                        )
+                        for c in connections:
+                            c.pop("usage")
+                            table_section.add_row(TableRow(**model(**c).dict()))
 
         if network_section.subsections:
             return network_section
@@ -128,20 +159,62 @@ def execute(self, request):
         a = tempfile.NamedTemporaryFile(delete=False)
         a.write(json.dumps(config_result).encode())
         a.seek(0)
-        request.add_supplementary(a.name, f"{request.sha256}_malware_config.json", "Raw output from configextractor-py")
+        request.add_supplementary(
+            a.name,
+            f"{request.sha256}_malware_config.json",
+            "Raw output from configextractor-py",
+        )
         for parser_framework, parser_results in config_result.items():
-            framework_section = ResultSection(parser_framework, parent=result, auto_collapse=True)
+            framework_section = ResultSection(
+                parser_framework, parent=result, auto_collapse=True
+            )
             for parser_name, parser_output in parser_results.items():
-                config = parser_output.pop('config')
-                parser_output['family'] = config.pop('family')
-                id = f'{parser_framework}_{parser_name}'
-                id_details = self.source_map[id]
-                parser_section = ResultSection(title_text=parser_name, body=json.dumps(parser_output),
-                parent=framework_section, body_format=BODY_FORMAT.KEY_VALUE,
-                tags={'file.rule.configextractor': f"{id_details['source_name']}.{parser_name}"}, classification=id_details['classification'])
+                # Get AL-specific details about the parser
+                id = f"{parser_framework}_{parser_name}"
+                classification = self.source_map[id]["classification"]
+                source_name = self.source_map[id]["source_name"]
+
+                config = parser_output.pop("config")
+
+                parser_output["family"] = config.pop("family")
+                parser_output["version"] = config.pop("version")
+
+                tags = {
+                    "file.rule.configextractor": [f"{source_name}.{parser_name}"],
+                    "attribution.family": [parser_output["family"]],
+                }
+                attack_ids = config.pop("attack", [])
+                if config.get("category"):
+                    category = config.pop("category")
+                    parser_output["category"] = category
+
+                if config.get("password"):
+                    password = config.pop("password", [])
+                    parser_output["password"] = password
+                    tags.update({"info.password": password})
+
+                if config.get("campaign_id"):
+                    campaign_id = config.pop("campaign_id", [])
+                    parser_output["Campaigh ID"] = campaign_id
+                    tags.update({"attribution.campaign": campaign_id})
+
+                parser_section = ResultSection(
+                    title_text=parser_name,
+                    body=json.dumps(parser_output),
+                    parent=framework_section,
+                    body_format=BODY_FORMAT.KEY_VALUE,
+                    tags=tags,
+                    heuristic=Heuristic(1, attack_ids=attack_ids),
+                    classification=classification,
+                )
                 network_section = self.network_ioc_section(config)
                 if network_section:
                     parser_section.add_subsection(network_section)
-
+                ResultSection(
+                    "Other data",
+                    body=json.dumps(config),
+                    body_format=BODY_FORMAT.JSON,
+                    parent=parser_section,
+                )
 
         request.result = result
diff --git a/configextractor_/update_server.py b/configextractor_/update_server.py
index e84ef35..3858e58 100644
--- a/configextractor_/update_server.py
+++ b/configextractor_/update_server.py
@@ -9,17 +9,29 @@
 from assemblyline.common.isotime import epoch_to_iso
 from assemblyline.odm.models.signature import Signature
 from assemblyline_client import get_client
-from assemblyline_v4_service.updater.updater import ServiceUpdater, temporary_api_key, UPDATER_DIR, UI_SERVER
+from assemblyline_v4_service.updater.updater import (
+    ServiceUpdater,
+    temporary_api_key,
+    UPDATER_DIR,
+    UI_SERVER,
+)
 from configextractor.main import ConfigExtractor
 
 
 classification = forge.get_classification()
 
+
 class CXUpdateServer(ServiceUpdater):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-    def import_update(self, files_sha256, client, source_name, default_classification=classification.UNRESTRICTED):
+    def import_update(
+        self,
+        files_sha256,
+        client,
+        source_name,
+        default_classification=classification.UNRESTRICTED,
+    ):
         def import_parsers(cx: ConfigExtractor):
             upload_list = list()
             parser_paths = cx.parsers.keys()
@@ -29,18 +41,31 @@ def import_parsers(cx: ConfigExtractor):
                 parser_details = cx.get_details(parser_path)
                 if parser_details:
                     id = f"{parser_details['framework']}_{parser_details['name']}"
-                    classification = parser_details['classification'] or default_classification
-                    source_map[id] = dict(classification=classification, source_name=source_name)
-                    upload_list.append(Signature(dict(
-                        classification=classification,
-                        data=open(parser_path, 'r').read(),
-                        name=parser_details['name'],
-                        signature_id=id,
-                        source=source_name,
-                        type='configextractor',
-                        status="DEPLOYED",
-                    )).as_primitives())
-            return client.signature.add_update_many(source_name, 'configextractor', upload_list, dedup_name=False), source_map
+                    classification = (
+                        parser_details["classification"] or default_classification
+                    )
+                    source_map[id] = dict(
+                        classification=classification, source_name=source_name
+                    )
+                    upload_list.append(
+                        Signature(
+                            dict(
+                                classification=classification,
+                                data=open(parser_path, "r").read(),
+                                name=parser_details["name"],
+                                signature_id=id,
+                                source=source_name,
+                                type="configextractor",
+                                status="DEPLOYED",
+                            )
+                        ).as_primitives()
+                    )
+            return (
+                client.signature.add_update_many(
+                    source_name, "configextractor", upload_list, dedup_name=False
+                ),
+                source_map,
+            )
 
         for dir, _ in files_sha256:
             # Remove cached duplicates
@@ -50,7 +75,9 @@ def import_parsers(cx: ConfigExtractor):
             if cx.parsers:
                 self.log.info(f"Found {len(cx.parsers)} parsers from {source_name}")
                 resp, source_map = import_parsers(cx)
-                self.log.info(f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline.")
+                self.log.info(
+                    f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline."
+                )
                 self.log.debug(resp)
                 self.log.debug(source_map)
 
@@ -58,29 +85,40 @@ def import_parsers(cx: ConfigExtractor):
                 for root, _, files in os.walk(dir):
                     for file in files:
                         if file == "requirements.txt":
-                            err = subprocess.run(['pip', 'install',
-                            '-r', os.path.join(root, file),
-                            '-t', os.path.join(self.latest_updates_dir, 'python_packages')]).stderr
+                            err = subprocess.run(
+                                [
+                                    "pip",
+                                    "install",
+                                    "-r",
+                                    os.path.join(root, file),
+                                    "-t",
+                                    os.path.join(
+                                        self.latest_updates_dir, "python_packages"
+                                    ),
+                                ],
+                                capture_output=True,
+                            ).stderr
                             if err:
                                 self.log.error(err)
 
-
                 # Save a local copy of the directory that may potentially contain dependency libraries for the parsers
                 try:
                     destination = os.path.join(self.latest_updates_dir, source_name)
-                    source_mapping_file = os.path.join(self.latest_updates_dir, 'source_mapping.json')
+                    source_mapping_file = os.path.join(
+                        self.latest_updates_dir, "source_mapping.json"
+                    )
                     # Removing old version of directory if exists
                     if os.path.exists(destination):
                         shutil.rmtree(destination)
                     shutil.move(dir, destination)
                     if os.path.exists(source_mapping_file):
-                        _tmp = json.loads(open(source_mapping_file, 'r').read())
+                        _tmp = json.loads(open(source_mapping_file, "r").read())
                         _tmp.update(source_map)
                         source_map = _tmp
 
-                    open(source_mapping_file, 'w').write(json.dumps(source_map))
+                    open(source_mapping_file, "w").write(json.dumps(source_map))
                 except shutil.Error as e:
-                    if 'already exists' in str(e):
+                    if "already exists" in str(e):
                         continue
                     raise e
 
@@ -98,22 +136,32 @@ def do_local_update(self) -> None:
 
             # Check if new signatures have been added
             self.log.info("Check for new signatures.")
-            if al_client.signature.update_available(since=epoch_to_iso(old_update_time) or '',
-                                                    sig_type=self.updater_type)['update_available']:
-                _, time_keeper = tempfile.mkstemp(prefix="time_keeper_", dir=UPDATER_DIR)
+            if al_client.signature.update_available(
+                since=epoch_to_iso(old_update_time) or "", sig_type=self.updater_type
+            )["update_available"]:
+                _, time_keeper = tempfile.mkstemp(
+                    prefix="time_keeper_", dir=UPDATER_DIR
+                )
                 self.log.info("An update is available for download from the datastore")
-                self.log.debug(f"{self.updater_type} update available since {epoch_to_iso(old_update_time) or ''}")
+                self.log.debug(
+                    f"{self.updater_type} update available since {epoch_to_iso(old_update_time) or ''}"
+                )
 
                 blocklisted_parsers = list()
-                [blocklisted_parsers.extend(list(item.values()))
-                    for item in al_client.search.signature(f'type:{self.updater_type} AND status:DISABLED',
-                                                           fl='id')['items']]
-                self.log.debug(f'Blocking the following parsers: {blocklisted_parsers}')
+                [
+                    blocklisted_parsers.extend(list(item.values()))
+                    for item in al_client.search.signature(
+                        f"type:{self.updater_type} AND status:DISABLED", fl="id"
+                    )["items"]
+                ]
+                self.log.debug(f"Blocking the following parsers: {blocklisted_parsers}")
                 output_directory = self.prepare_output_directory()
-                open(os.path.join(output_directory, 'blocked_parsers'), 'w').write('\n'.join(blocklisted_parsers))
+                open(os.path.join(output_directory, "blocked_parsers"), "w").write(
+                    "\n".join(blocklisted_parsers)
+                )
                 self.serve_directory(output_directory, time_keeper)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     with CXUpdateServer() as server:
         server.serve_forever()

From 93560b8b5f01dcf3e67a97eb9284ecc0bc0c96b9 Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Wed, 17 Aug 2022 19:43:29 +0000
Subject: [PATCH 15/23] version fix

---
 configextractor_/configextractor_.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index d857ba8..07630c3 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -184,9 +184,9 @@ def execute(self, request):
                     "attribution.family": [parser_output["family"]],
                 }
                 attack_ids = config.pop("attack", [])
-                if config.get("category"):
-                    category = config.pop("category")
-                    parser_output["category"] = category
+                for field in ["category", "version"]:
+                    if config.get(field):
+                        parser_output[field] = config.pop(field)
 
                 if config.get("password"):
                     password = config.pop("password", [])

From b1ebc5b93c3219a2e0ebcc634888dd5309acb794 Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Thu, 18 Aug 2022 17:43:30 +0000
Subject: [PATCH 16/23] Cleanup nesting; fix pop bug

---
 configextractor_/configextractor_.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index 07630c3..db7731f 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -143,7 +143,7 @@ def network_ioc_section(self, config) -> ResultSection:
                             tags=tags,
                         )
                         for c in connections:
-                            c.pop("usage")
+                            c.pop("usage", None)
                             table_section.add_row(TableRow(**model(**c).dict()))
 
         if network_section.subsections:
@@ -165,9 +165,6 @@ def execute(self, request):
             "Raw output from configextractor-py",
         )
         for parser_framework, parser_results in config_result.items():
-            framework_section = ResultSection(
-                parser_framework, parent=result, auto_collapse=True
-            )
             for parser_name, parser_output in parser_results.items():
                 # Get AL-specific details about the parser
                 id = f"{parser_framework}_{parser_name}"
@@ -177,7 +174,7 @@ def execute(self, request):
                 config = parser_output.pop("config")
 
                 parser_output["family"] = config.pop("family")
-                parser_output["version"] = config.pop("version")
+                parser_output["Framework"] = parser_framework
 
                 tags = {
                     "file.rule.configextractor": [f"{source_name}.{parser_name}"],
@@ -201,7 +198,7 @@ def execute(self, request):
                 parser_section = ResultSection(
                     title_text=parser_name,
                     body=json.dumps(parser_output),
-                    parent=framework_section,
+                    parent=result,
                     body_format=BODY_FORMAT.KEY_VALUE,
                     tags=tags,
                     heuristic=Heuristic(1, attack_ids=attack_ids),

From 24b451b60b5f9bfb432c0790edded9d72a03242c Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Fri, 19 Aug 2022 14:20:49 +0000
Subject: [PATCH 17/23] Fix blocking parsers

---
 configextractor_/configextractor_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index db7731f..36547cb 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -62,7 +62,7 @@ def _load_rules(self) -> None:
                 sys.path.append(python_packages_dir)
 
             if os.path.exists(blocklist_location):
-                for line in open(blocklist_location, "r").readlines():
+                for line in open(blocklist_location, "r").read().splitlines():
                     _, source, _, parser_name = line.split("_", 3)
                     blocklist.append(rf".*{parser_name}$")
             self.log.info(

From 12f9deabfaaedd8614466f77aff3e83c88f9f811 Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Fri, 19 Aug 2022 16:19:31 +0000
Subject: [PATCH 18/23] define tagging for MACO network models

---
 configextractor_/configextractor_.py |  26 +++---
 configextractor_/maco_tags.py        | 113 +++++++++++++++++++++++++++
 2 files changed, 128 insertions(+), 11 deletions(-)
 create mode 100644 configextractor_/maco_tags.py

diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index 36547cb..71d244d 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -20,6 +20,7 @@
 import tempfile
 
 from configextractor.main import ConfigExtractor as CX
+from configextractor_.maco_tags import extract_connection_tags, extract_DNS_tags, extract_FTP_tags, extract_HTTP_tags, extract_proxy_tags, extract_SMTP_tags, extract_SSH_tags
 from maco.model import ExtractorModel, ConnUsageEnum
 
 cl_engine = forge.get_classification()
@@ -111,16 +112,16 @@ def network_ioc_section(self, config) -> ResultSection:
         network_section = ResultSection("Network IOCs")
 
         network_fields = {
-            "ftp": ExtractorModel.FTP,
-            "smtp": ExtractorModel.SMTP,
-            "http": ExtractorModel.Http,
-            "ssh": ExtractorModel.SSH,
-            "proxy": ExtractorModel.Proxy,
-            "dns": ExtractorModel.DNS,
-            "tcp": ExtractorModel.Connection,
-            "udp": ExtractorModel.Connection,
+            "ftp": (ExtractorModel.FTP, extract_FTP_tags),
+            "smtp": (ExtractorModel.SMTP, extract_SMTP_tags),
+            "http": (ExtractorModel.Http, extract_HTTP_tags),
+            "ssh": (ExtractorModel.SSH, extract_SSH_tags),
+            "proxy": (ExtractorModel.Proxy, extract_proxy_tags),
+            "dns": (ExtractorModel.DNS, extract_DNS_tags),
+            "tcp": (ExtractorModel.Connection, extract_connection_tags),
+            "udp": (ExtractorModel.Connection, extract_connection_tags),
         }
-        for field, model in network_fields.items():
+        for field, model_tuple in network_fields.items():
             sorted_network_config = {}
             for network_config in config.pop(field, []):
                 sorted_network_config.setdefault(
@@ -132,9 +133,9 @@ def network_ioc_section(self, config) -> ResultSection:
                     field.upper(), parent=network_section
                 )
                 for usage, connections in sorted_network_config.items():
-                    tags = list()
+                    model, tag_extractor = model_tuple
                     if usage not in ["decoy"]:
-                        self.tag_output(connections, tags)
+                        tags = tag_extractor(connections)
                         heuristic = Heuristic(2, signature=usage)
                         table_section = ResultTableSection(
                             title_text=f"Usage: {usage.upper()} x{len(connections)}",
@@ -207,11 +208,14 @@ def execute(self, request):
                 network_section = self.network_ioc_section(config)
                 if network_section:
                     parser_section.add_subsection(network_section)
+                other_tags = {}
+                self.tag_output(config, other_tags)
                 ResultSection(
                     "Other data",
                     body=json.dumps(config),
                     body_format=BODY_FORMAT.JSON,
                     parent=parser_section,
+                    tags=other_tags
                 )
 
         request.result = result
diff --git a/configextractor_/maco_tags.py b/configextractor_/maco_tags.py
new file mode 100644
index 0000000..44477a0
--- /dev/null
+++ b/configextractor_/maco_tags.py
@@ -0,0 +1,113 @@
+# Documents how Model objects in the MACO standard translate to Assemblyline tags
+
+from typing import Dict, List
+
+
+def extract_FTP_tags(data: List[Dict]) -> Dict:
+    tags = {'network.protocol': ['FTP']}
+    for d in data:
+        if d.get('password'):
+            tags.setdefault('info.password', []).append(d['password'])
+        if d.get('hostname'):
+            tags.setdefault('network.static.domain', []).append(d['hostname'])
+        if d.get('port'):
+            tags.setdefault('network.port', []).append(d['hostname'])
+
+        if d.get('path'):
+            tags.setdefault('file.path', []).append(d['path'])
+
+    return tags
+
+
+def extract_SMTP_tags(data: List[Dict]) -> Dict:
+    tags = {'network.protocol': ['SMTP']}
+    for d in data:
+        if d.get('password'):
+            tags.setdefault('info.password', []).append(d['password'])
+        if d.get('hostname'):
+            tags.setdefault('network.static.domain', []).append(d['hostname'])
+        if d.get('port'):
+            tags.setdefault('network.port', []).append(d['hostname'])
+
+        if d.get('mail_to'):
+            tags.setdefault('network.email.address', []).extend(d['mail_to'])
+        if d.get('mail_from'):
+            tags.setdefault('network.email.address', []).append(d['mail_from'])
+        if d.get('subject'):
+            tags.setdefault('network.email.subject', []).append(d['mail_from'])
+
+    return tags
+
+
+def extract_HTTP_tags(data: List[Dict]) -> Dict:
+    tags = {}
+    for d in data:
+        tags.setdefault('network.protocol', []).append(d.get('protocol', 'HTTP').upper())
+        if d.get('password'):
+            tags.setdefault('info.password', []).append(d['password'])
+        if d.get('hostname'):
+            tags.setdefault('network.static.domain', []).append(d['hostname'])
+        if d.get('port'):
+            tags.setdefault('network.port', []).append(d['hostname'])
+
+        if d.get('uri'):
+            tags.setdefault('network.static.uri', []).extend(d['uri'])
+        if d.get('path'):
+            tags.setdefault('network.static.uri_path', []).extend(d['path'])
+        if d.get('user_agent'):
+            tags.setdefault('network.user_agent', []).append(d['user_agent'])
+
+    return tags
+
+
+def extract_SSH_tags(data: List[Dict]) -> Dict:
+    tags = {'network.protocol': ['SSH']}
+    for d in data:
+        if d.get('password'):
+            tags.setdefault('info.password', []).append(d['password'])
+        if d.get('hostname'):
+            tags.setdefault('network.static.domain', []).append(d['hostname'])
+        if d.get('port'):
+            tags.setdefault('network.port', []).append(d['hostname'])
+
+    return tags
+
+
+def extract_proxy_tags(data: List[Dict]) -> Dict:
+    tags = {}
+    for d in data:
+        if d.get('protocol'):
+            tags.setdefault('network.protocol', []).append(d['protocol'])
+        if d.get('password'):
+            tags.setdefault('info.password', []).append(d['password'])
+        if d.get('hostname'):
+            tags.setdefault('network.static.domain', []).append(d['hostname'])
+        if d.get('port'):
+            tags.setdefault('network.port', []).append(d['hostname'])
+
+    return tags
+
+
+def extract_DNS_tags(data: List[Dict]) -> Dict:
+    tags = {}
+    for d in data:
+        if d.get('ip'):
+            tags.setdefault('network.static.ip', []).append(d['ip'])
+        if d.get('port'):
+            tags.setdefault('network.port', []).append(d['hostname'])
+
+    return tags
+
+
+def extract_connection_tags(data: List[Dict]) -> Dict:
+    tags = {}
+    for d in data:
+        for side in ['client', 'server']:
+            if d.get(f'{side}_ip'):
+                tags.setdefault('network.static.ip', []).append(d[f'{side}_ip'])
+            if d.get(f'{side}_port'):
+                tags.setdefault('network.port', []).append(d[f'{side}_port'])
+            if d.get(f'{side}_domain'):
+                tags.setdefault('network.static.domain', []).append(d[f'{side}_domain'])
+
+    return tags

From b3c74ce6d891389701575e4b3050ba1cd56d1b0e Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Fri, 19 Aug 2022 16:45:57 +0000
Subject: [PATCH 19/23] set default for misc. tagging

---
 configextractor_/configextractor_.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index 71d244d..6dc6a83 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -89,11 +89,11 @@ def _load_rules(self) -> None:
     def tag_output(self, output: Any, tags: dict = {}):
         def tag_string(value):
             if regex.search(IP_ONLY_REGEX, value):
-                tags["network.static.ip"].append(value)
+                tags.setdefault("network.static.ip", []).append(value)
             elif regex.search(DOMAIN_ONLY_REGEX, value):
-                tags["network.static.domain"].append(value)
+                tags.setdefault("network.static.domain", []).append(value)
             elif regex.search(FULL_URI, value):
-                tags["network.static.uri"].append(value)
+                tags.setdefault("network.static.uri", []).append(value)
 
         if isinstance(output, dict):
             # Iterate over valuse of dictionary

From 4ddc335547f1e5dbf30285794868e9945c2f35ae Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Mon, 22 Aug 2022 15:09:46 +0000
Subject: [PATCH 20/23] don't display 'other' network IOCs

---
 configextractor_/configextractor_.py |  2 +-
 configextractor_/update_server.py    | 56 ++++++++--------------------
 2 files changed, 16 insertions(+), 42 deletions(-)

diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index 6dc6a83..aeae4f9 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -134,7 +134,7 @@ def network_ioc_section(self, config) -> ResultSection:
                 )
                 for usage, connections in sorted_network_config.items():
                     model, tag_extractor = model_tuple
-                    if usage not in ["decoy"]:
+                    if usage not in ["decoy", "other"]:
                         tags = tag_extractor(connections)
                         heuristic = Heuristic(2, signature=usage)
                         table_section = ResultTableSection(
diff --git a/configextractor_/update_server.py b/configextractor_/update_server.py
index 3858e58..a93f87e 100644
--- a/configextractor_/update_server.py
+++ b/configextractor_/update_server.py
@@ -9,12 +9,8 @@
 from assemblyline.common.isotime import epoch_to_iso
 from assemblyline.odm.models.signature import Signature
 from assemblyline_client import get_client
-from assemblyline_v4_service.updater.updater import (
-    ServiceUpdater,
-    temporary_api_key,
-    UPDATER_DIR,
-    UI_SERVER,
-)
+from assemblyline_v4_service.updater.updater import ServiceUpdater, temporary_api_key, UPDATER_DIR, UI_SERVER
+
 from configextractor.main import ConfigExtractor
 
 
@@ -41,12 +37,8 @@ def import_parsers(cx: ConfigExtractor):
                 parser_details = cx.get_details(parser_path)
                 if parser_details:
                     id = f"{parser_details['framework']}_{parser_details['name']}"
-                    classification = (
-                        parser_details["classification"] or default_classification
-                    )
-                    source_map[id] = dict(
-                        classification=classification, source_name=source_name
-                    )
+                    classification = parser_details["classification"] or default_classification
+                    source_map[id] = dict(classification=classification, source_name=source_name)
                     upload_list.append(
                         Signature(
                             dict(
@@ -61,9 +53,7 @@ def import_parsers(cx: ConfigExtractor):
                         ).as_primitives()
                     )
             return (
-                client.signature.add_update_many(
-                    source_name, "configextractor", upload_list, dedup_name=False
-                ),
+                client.signature.add_update_many(source_name, "configextractor", upload_list, dedup_name=False),
                 source_map,
             )
 
@@ -75,9 +65,7 @@ def import_parsers(cx: ConfigExtractor):
             if cx.parsers:
                 self.log.info(f"Found {len(cx.parsers)} parsers from {source_name}")
                 resp, source_map = import_parsers(cx)
-                self.log.info(
-                    f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline."
-                )
+                self.log.info(f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline.")
                 self.log.debug(resp)
                 self.log.debug(source_map)
 
@@ -87,14 +75,9 @@ def import_parsers(cx: ConfigExtractor):
                         if file == "requirements.txt":
                             err = subprocess.run(
                                 [
-                                    "pip",
-                                    "install",
-                                    "-r",
-                                    os.path.join(root, file),
-                                    "-t",
-                                    os.path.join(
-                                        self.latest_updates_dir, "python_packages"
-                                    ),
+                                    "pip", "install",
+                                    "-r", os.path.join(root, file),
+                                    "-t", os.path.join(self.latest_updates_dir, "python_packages"),
                                 ],
                                 capture_output=True,
                             ).stderr
@@ -104,9 +87,7 @@ def import_parsers(cx: ConfigExtractor):
                 # Save a local copy of the directory that may potentially contain dependency libraries for the parsers
                 try:
                     destination = os.path.join(self.latest_updates_dir, source_name)
-                    source_mapping_file = os.path.join(
-                        self.latest_updates_dir, "source_mapping.json"
-                    )
+                    source_mapping_file = os.path.join(self.latest_updates_dir, "source_mapping.json")
                     # Removing old version of directory if exists
                     if os.path.exists(destination):
                         shutil.rmtree(destination)
@@ -136,9 +117,8 @@ def do_local_update(self) -> None:
 
             # Check if new signatures have been added
             self.log.info("Check for new signatures.")
-            if al_client.signature.update_available(
-                since=epoch_to_iso(old_update_time) or "", sig_type=self.updater_type
-            )["update_available"]:
+            if al_client.signature.update_available(since=epoch_to_iso(old_update_time) or "",
+                                                    sig_type=self.updater_type)["update_available"]:
                 _, time_keeper = tempfile.mkstemp(
                     prefix="time_keeper_", dir=UPDATER_DIR
                 )
@@ -148,17 +128,11 @@ def do_local_update(self) -> None:
                 )
 
                 blocklisted_parsers = list()
-                [
-                    blocklisted_parsers.extend(list(item.values()))
-                    for item in al_client.search.signature(
-                        f"type:{self.updater_type} AND status:DISABLED", fl="id"
-                    )["items"]
-                ]
+                [blocklisted_parsers.extend(list(item.values())) for item in
+                 al_client.search.signature(f"type:{self.updater_type} AND status:DISABLED", fl="id")["items"]]
                 self.log.debug(f"Blocking the following parsers: {blocklisted_parsers}")
                 output_directory = self.prepare_output_directory()
-                open(os.path.join(output_directory, "blocked_parsers"), "w").write(
-                    "\n".join(blocklisted_parsers)
-                )
+                open(os.path.join(output_directory, "blocked_parsers"), "w").write("\n".join(blocklisted_parsers))
                 self.serve_directory(output_directory, time_keeper)
 
 

From f96461a7147077633e0ed8cd0449fe1938474f8e Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Tue, 23 Aug 2022 11:16:26 +0000
Subject: [PATCH 21/23] Attach ontology

---
 configextractor_/configextractor_.py | 24 ++++++++++++++++++++++--
 configextractor_/maco_tags.py        |  4 ++--
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py
index aeae4f9..64d35c8 100755
--- a/configextractor_/configextractor_.py
+++ b/configextractor_/configextractor_.py
@@ -1,7 +1,8 @@
 from typing import Any
 
-from assemblyline.common import forge
+from assemblyline.common import forge, attack_map
 from assemblyline.odm.base import IP_ONLY_REGEX, FULL_URI, DOMAIN_ONLY_REGEX
+from assemblyline.odm.models.ontology.results import MalwareConfig
 from assemblyline_v4_service.common.base import ServiceBase
 from assemblyline_v4_service.common.result import (
     Result,
@@ -150,6 +151,20 @@ def network_ioc_section(self, config) -> ResultSection:
         if network_section.subsections:
             return network_section
 
+    def attach_ontology(self, config: dict):
+        def strip_null(d: dict):
+            clean_config = {}
+            for k, v in d.items():
+                if v:
+                    if isinstance(v, dict):
+                        clean_config[k] = strip_null(v)
+                    elif isinstance(v, list) and isinstance(v[0], dict):
+                        clean_config[k] = [strip_null(vi) for vi in v]
+                    else:
+                        clean_config[k] = v
+            return clean_config
+        self.ontology.add_result_part(MalwareConfig, strip_null(config))
+
     def execute(self, request):
         result = Result()
         config_result = self.cx.run_parsers(request.file_path)
@@ -171,9 +186,14 @@ def execute(self, request):
                 id = f"{parser_framework}_{parser_name}"
                 classification = self.source_map[id]["classification"]
                 source_name = self.source_map[id]["source_name"]
-
                 config = parser_output.pop("config")
 
+                # Correct revoked ATT&CK IDs
+                for i, v in enumerate(config.get('attack', [])):
+                    config['attack'][i] = attack_map.revoke_map.get(v, v)
+
+                self.attach_ontology(config)
+
                 parser_output["family"] = config.pop("family")
                 parser_output["Framework"] = parser_framework
 
diff --git a/configextractor_/maco_tags.py b/configextractor_/maco_tags.py
index 44477a0..df38f08 100644
--- a/configextractor_/maco_tags.py
+++ b/configextractor_/maco_tags.py
@@ -51,9 +51,9 @@ def extract_HTTP_tags(data: List[Dict]) -> Dict:
             tags.setdefault('network.port', []).append(d['hostname'])
 
         if d.get('uri'):
-            tags.setdefault('network.static.uri', []).extend(d['uri'])
+            tags.setdefault('network.static.uri', []).append(d['uri'])
         if d.get('path'):
-            tags.setdefault('network.static.uri_path', []).extend(d['path'])
+            tags.setdefault('network.static.uri_path', []).append(d['path'])
         if d.get('user_agent'):
             tags.setdefault('network.user_agent', []).append(d['user_agent'])
 

From 618cafbf5b61f1cd63446c590d21f3742e686295 Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Tue, 23 Aug 2022 18:46:39 +0000
Subject: [PATCH 22/23] update documentation

---
 README.md | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f554b69..a2b887d 100755
--- a/README.md
+++ b/README.md
@@ -1,4 +1,25 @@
 # ConfigExtractor Service
-**NOTE** : All malware parsers, yara rules etc used in this service are open-source and available in the [ConfigExtractor](https://github.com/CybercentreCanada/configextractor-py) Python library.
+This Assemblyline service extracts malware configurations (such as IP, URL and domain) for various malware family by leveraging the [ConfigExtractor Python library](https://github.com/CybercentreCanada/configextractor-py) for analysis.
 
-This Assemblyline service extracts malware configurations (such as IP, URL and domain) for various malware family by leveraging the ConfigExtractor Python library for analysis.
+## Updater
+
+### Sources
+The updater for this service requires matches on directories containing parsers.
+
+
+For example, the CAPE source will have a match pattern of `.*/modules/processing/parsers/CAPE/$` in which we're trying to target the parsers in this directory only.
+
+### Persistence
+The updater assumes that you have attached a storage volume to store your collection of sources. Contrary to other services, this updater relies on a storage volume to maintain persistence rather than Assemblyline's datastore.
+
+### Python Packages
+The updater is able to scan through the directory containing parsers and look for `requirements.txt` files and install Python packages to a directory that should get passed onto service instances.
+
+## [ConfigExtractor Python Library](https://github.com/CybercentreCanada/configextractor-py)
+
+All parser directories that are able to work with this library should also be compatible with the service.
+
+At the time of writing, we officially support the following frameworks:
+ - [MWCP](https://github.com/dod-cyber-crime-center/DC3-MWCP)
+ - [CAPE w/ MACO output](https://github.com/kevoreilly/CAPEv2)
+ - [MACO](https://github.com/CybercentreCanada/Maco)

From d776d75d9a1132433e307293c24245f38ccce4e5 Mon Sep 17 00:00:00 2001
From: cccs-rs <ryan.samaroo@cyber.gc.ca>
Date: Tue, 23 Aug 2022 18:49:48 +0000
Subject: [PATCH 23/23] remove old tests

---
 pipelines/azure-build.yaml                    |   3 -
 pipelines/azure-tests.yaml                    |  71 ---
 test/__init__.py                              |   0
 test/requirements.txt                         |   3 -
 ...7d549fd8fec2a894dd15310053b0b8078064a5754b |   1 -
 test/test_configextractor.py                  | 475 ------------------
 6 files changed, 553 deletions(-)
 delete mode 100644 pipelines/azure-tests.yaml
 delete mode 100644 test/__init__.py
 delete mode 100644 test/requirements.txt
 delete mode 100644 test/samples/c805d89c6d26e6080994257d549fd8fec2a894dd15310053b0b8078064a5754b
 delete mode 100644 test/test_configextractor.py

diff --git a/pipelines/azure-build.yaml b/pipelines/azure-build.yaml
index 0ad07be..cb44a9a 100644
--- a/pipelines/azure-build.yaml
+++ b/pipelines/azure-build.yaml
@@ -27,9 +27,6 @@ stages:
               if [[ "$TAG" == *stable* ]]; then export BUILD_TYPE=stable; else export BUILD_TYPE=latest; fi
               docker build --build-arg version=$TAG --build-arg branch=$BUILD_TYPE -t cccs/assemblyline-service-configextractor:$TAG -t cccs/assemblyline-service-configextractor:$BUILD_TYPE -f ./Dockerfile .
             displayName: Build containers
-          # - script: |
-          #     docker run -v `pwd`/test/:/opt/al_service/test/ cccs/assemblyline-service-configextractor:latest bash -c 'pip install -U -r test/requirements.txt; pytest'
-          #   displayName: Test containers
           - script: |
               docker push cccs/assemblyline-service-configextractor --all-tags
             displayName: Deploy to Docker Hub
diff --git a/pipelines/azure-tests.yaml b/pipelines/azure-tests.yaml
deleted file mode 100644
index 68ef7a8..0000000
--- a/pipelines/azure-tests.yaml
+++ /dev/null
@@ -1,71 +0,0 @@
-name: tests
-
-trigger: ["*"]
-pr: ["*"]
-
-pool:
-  vmImage: "ubuntu-20.04"
-
-jobs:
-  - job: run_test
-    strategy:
-      matrix:
-        python3_7:
-          python.version: "3.7"
-        Python3_8:
-          python.version: "3.8"
-
-    timeoutInMinutes: 10
-
-    steps:
-      - task: UsePythonVersion@0
-        displayName: Set python version
-        inputs:
-          versionSpec: "$(python.version)"
-      - script: |
-          install_path=`pwd`
-          echo "Setup YARA"
-          YARA_VERSION=4.1.0
-          sudo apt-get update
-          sudo apt-get install -y libfuzzy-dev git libssl1.1 libmagic1 libssl-dev libmagic-dev automake libtool make gcc wget git
-          sudo rm -rf /var/lib/apt/lists/*
-          wget -O /tmp/yara.tar.gz https://github.com/VirusTotal/yara/archive/v$YARA_VERSION.tar.gz
-          tar -zxf /tmp/yara.tar.gz -C /tmp
-          cd /tmp/yara-$YARA_VERSION
-          ./bootstrap.sh
-          ./configure --enable-cuckoo --enable-magic --enable-dotnet --with-crypto --prefix /tmp/yara_install
-          make
-          make install
-          sudo cp -r /tmp/yara_install /usr/local
-          cd $install_path
-
-          echo "Install Python packages"
-          sudo env "PATH=$PATH" python -m pip install -U --no-cache-dir assemblyline assemblyline_v4_service magic-yara-python gitpython plyara pyparsing==2.3.0
-          sudo env "PATH=$PATH" python -m pip install -U -r `pwd`/test/requirements.txt
-
-          echo "Install ConfigExtractor"
-          git clone --recurse-submodules https://github.com/CybercentreCanada/configextractor-py.git /tmp/configextractor-py
-          sudo env "PATH=$PATH" python -m pip install -U --no-cache-dir /tmp/configextractor-py/RATDecoders/ /tmp/configextractor-py/
-          sudo mkdir /opt/al_service/
-
-          echo "Cloning CAPE parsers and patch in library"
-          git clone https://github.com/kevoreilly/CAPEv2.git /tmp/CAPEv2
-          sudo rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/*.py_disabled
-          sudo rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/test_cape.py
-
-          echo "Copying library and parsers only"
-          sudo mkdir -p /opt/al_service/CAPEv2/modules/processing/parsers/CAPE/
-          sudo cp -r /tmp/CAPEv2/modules/processing/parsers/CAPE/* /opt/al_service/CAPEv2/modules/processing/parsers/CAPE/
-          sudo mkdir -p /opt/al_service/CAPEv2/lib
-          sudo cp -r /tmp/CAPEv2/lib/* /opt/al_service/CAPEv2/lib/
-
-          sudo
-          CAPE_PARSERS_DIR=/opt/al_service/CAPEv2/modules/processing/parsers/CAPE/
-          PYTHONPATH=$PYTHONPATH:/opt/al_service/CAPEv2/
-          sudo mv /tmp/configextractor-py/dependencies /opt/al_service/dependencies
-          sudo rm -rf /tmp/* /var/lib/apt/lists/* ~/.cache/pip /tmp/configextractor-py/
-        displayName: Setup environment
-      - script: python -m pytest --durations=10 -rsx -vv --cov-report=xml --cov=configextractor
-        displayName: Test
-      - script: python -m codecov
-        displayName: Upload Coverage
diff --git a/test/__init__.py b/test/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/test/requirements.txt b/test/requirements.txt
deleted file mode 100644
index 76e4fb4..0000000
--- a/test/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-pytest
-pytest-cov
-codecov
\ No newline at end of file
diff --git a/test/samples/c805d89c6d26e6080994257d549fd8fec2a894dd15310053b0b8078064a5754b b/test/samples/c805d89c6d26e6080994257d549fd8fec2a894dd15310053b0b8078064a5754b
deleted file mode 100644
index e9ea42a..0000000
--- a/test/samples/c805d89c6d26e6080994257d549fd8fec2a894dd15310053b0b8078064a5754b
+++ /dev/null
@@ -1 +0,0 @@
-this is a text file
diff --git a/test/test_configextractor.py b/test/test_configextractor.py
deleted file mode 100644
index fbf885d..0000000
--- a/test/test_configextractor.py
+++ /dev/null
@@ -1,475 +0,0 @@
-import os
-import json
-import pytest
-import shutil
-import yaml
-from configextractor import cli
-cli.ROOT_DIR = '/opt/al_service/dependencies'
-cli.init_root_dependencies()
-cli.load_parsers()
-
-# Getting absolute paths, names and regexes
-TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-ROOT_DIR = os.path.dirname(TEST_DIR)
-SERVICE_CONFIG_NAME = "service_manifest.yml"
-SERVICE_CONFIG_PATH = os.path.join(ROOT_DIR, SERVICE_CONFIG_NAME)
-TEMP_SERVICE_CONFIG_PATH = os.path.join("/tmp", SERVICE_CONFIG_NAME)
-
-# Samples that we will be sending to the service
-samples = [dict(
-    sid=1,
-    metadata={},
-    service_name='configextractor',
-    service_config={'use_cape': True},
-    fileinfo=dict(
-        magic='ASCII text, with no line terminators',
-        md5='fda4e701258ba56f465e3636e60d36ec',
-        mime='text/plain',
-        sha1='af2c2618032c679333bebf745e75f9088748d737',
-        sha256='c805d89c6d26e6080994257d549fd8fec2a894dd15310053b0b8078064a5754b',
-        size=19,
-        type='unknown',
-    ),
-    filename='c805d89c6d26e6080994257d549fd8fec2a894dd15310053b0b8078064a5754b',
-    min_classification='TLP:WHITE',
-    max_files=501,  # TODO: get the actual value
-    ttl=3600,
-),
-]
-
-
-def create_tmp_manifest():
-    temp_service_config_path = os.path.join("/tmp", SERVICE_CONFIG_NAME)
-    if not os.path.exists(temp_service_config_path):
-        # Placing the service_manifest.yml in the tmp directory
-        shutil.copyfile(SERVICE_CONFIG_PATH, temp_service_config_path)
-
-
-def remove_tmp_manifest():
-    temp_service_config_path = os.path.join("/tmp", SERVICE_CONFIG_NAME)
-    if os.path.exists(temp_service_config_path):
-        os.remove(temp_service_config_path)
-
-
-def return_result_section_class():
-    create_tmp_manifest()
-    from assemblyline_v4_service.common.result import ResultSection
-    remove_tmp_manifest()
-    return ResultSection
-
-
-@pytest.fixture
-def class_instance():
-    create_tmp_manifest()
-    try:
-        from configextractor_ import ConfigExtractor
-        yield ConfigExtractor()
-    finally:
-        remove_tmp_manifest()
-
-
-@pytest.fixture
-def parsers():
-    from assemblyline.odm.models.tagging import Tagging
-    correct_yara_externals = {f'al_{x.replace(".", "_")}': "" for x in Tagging.flat_fields().keys()}
-    correct_yara_externals['al_file_rule_yara'] = ""
-    return cli.compile(correct_yara_externals)
-
-
-def get_section_builder_inputs() -> list:
-    possible_inputs_for_section_builder = []
-    parser_names = yaml.safe_load(open(cli.YARA_PARSER_PATH, 'r').read()).keys()
-    parser_types = ["MWCP", "RATDecoder"]
-    field_dict = {
-        "address": ['999'],
-        "other": {
-            "a": "b"
-        },
-        "not_in_field_map": True
-    }
-    for parser_name in parser_names:
-        for parser_type in parser_types:
-            possible_inputs_for_section_builder.append((parser_name, field_dict, parser_type))
-    return possible_inputs_for_section_builder
-
-
-def get_classification_checker_inputs() -> list:
-    ResultSection = return_result_section_class()
-
-    data_for_result_sections = get_section_builder_inputs()
-    possible_inputs_for_classification_checker = []
-    for parser_name, field_dict, parser_type in data_for_result_sections:
-        res_sec = ResultSection(f"{parser_type} : {parser_name}")
-        possible_inputs_for_classification_checker.append((res_sec, parser_name))
-    return possible_inputs_for_classification_checker
-
-
-def get_subsection_builder_inputs() -> list:
-    ResultSection = return_result_section_class()
-
-    parent_result_section = ResultSection("parent")
-    field_dict = {
-        "address": ["list_sample"],
-        "c2_address": [["nested_list_sample"]],
-        "c2_url": [["nested_list_sample"], "list_sample"]
-    }
-    possible_inputs_for_subsection_builder = [(parent_result_section, field_dict)]
-    return possible_inputs_for_subsection_builder
-
-
-def check_section_equality(this, that) -> bool:
-    # Recursive method to check equality of result section and nested sections
-
-    # Heuristics also need their own equality checks
-    if this.heuristic and that.heuristic:
-        result_heuristic_equality = this.heuristic.attack_ids == that.heuristic.attack_ids and \
-            this.heuristic.frequency == that.heuristic.frequency and \
-            this.heuristic.heur_id == that.heuristic.heur_id and \
-            this.heuristic.score == that.heuristic.score and \
-            this.heuristic.score_map == that.heuristic.score_map and \
-            this.heuristic.signatures == that.heuristic.signatures
-
-    elif not this.heuristic and not that.heuristic:
-        result_heuristic_equality = True
-    else:
-        result_heuristic_equality = False
-
-    # Assuming we are given the "root section" at all times, it is safe to say that we don't need to confirm parent
-    current_section_equality = result_heuristic_equality and \
-        this.body == that.body and \
-        this.body_format == that.body_format and \
-        this.classification == that.classification and \
-        this.depth == that.depth and \
-        len(this.subsections) == len(that.subsections) and \
-        this.title_text == that.title_text
-
-    if not current_section_equality:
-        return False
-
-    for index, subsection in enumerate(this.subsections):
-        subsection_equality = check_section_equality(subsection, that.subsections[index])
-        if not subsection_equality:
-            return False
-
-    return True
-
-
-def check_reporter_equality(this, that) -> bool:
-    # Checks all mwcp.Report attributes except for managed_tempdir
-    reporter_equality = this.errors == that.errors and this.finalized == that.finalized \
-        and this.input_file == that.input_file \
-        and {x: sorted(this.metadata[x]) for x in this.metadata.keys()} == that.metadata \
-        and this.parser == that.parser
-    if not reporter_equality:
-        return reporter_equality
-
-    # Also in the case where a metadata list exists, the order does not matter, so check as such
-    metadata_equality = this.metadata.keys() == that.metadata.keys()
-    if not metadata_equality:
-        return metadata_equality
-
-    for key, value in this.metadata.items():
-        if not metadata_equality:
-            return metadata_equality
-        if type(value) == list:
-            if len(value) != len(that.metadata[key]):
-                return False
-            for item in value:
-                if item not in that.metadata[key]:
-                    return False
-        else:
-            metadata_equality = value == that.metadata[key]
-
-    return reporter_equality and metadata_equality
-
-
-def create_correct_result_section_tree(fields, parsers=None, parser_type=None, parser_name=None):
-    from configextractor_ import FIELD_TAG_MAP, tag_network_ioc
-    from assemblyline_v4_service.common.result import BODY_FORMAT
-    from assemblyline.common import forge
-    cl_engine = forge.get_classification()
-    ResultSection = return_result_section_class()
-    other_key = "other"
-    ratdecoder = "RATDecoder"
-    mwcp = "MWCP"
-    malware_name = ''
-    malware_types = []
-    mitre_group = ''
-    mitre_att = ''
-    category = 'malware'
-    correct_file_parsers = {}
-
-    if parser_type not in [ratdecoder, mwcp] or not parser_name:
-        correct_parent_section = ResultSection("parent")
-    else:
-        [correct_file_parsers.update(p) for p in parsers]
-        correct_parent_section = ResultSection(f"{parser_type} : {parser_name}")
-
-    parser_attributes = {}
-    if parser_type == mwcp:
-        obj = correct_file_parsers[parser_name]
-        for item in ['classification', 'mitre_group', 'mitre_att',
-                     'malware', 'malware_types', 'category']:
-            val = getattr(obj, item, None)
-            if val:
-                parser_attributes[item] = val
-        malware_name = obj.malware
-        malware_types = obj.malware_types
-        mitre_att = obj.mitre_att
-        mitre_group = obj.mitre_group
-        category = obj.category
-    elif parser_type == ratdecoder:
-        malware_name = parser_name
-
-    if correct_file_parsers:
-        parser_classification = correct_file_parsers[parser_name].classification
-        correct_classification = cl_engine.normalize_classification(parser_classification)
-        correct_parent_section.classification = correct_classification
-
-    if fields and parser_type:
-        from configextractor_ import HEURISTICS_MAP
-        correct_parent_section.set_body(json.dumps(parser_attributes), body_format=BODY_FORMAT.KEY_VALUE)
-        correct_parent_section.set_heuristic(HEURISTICS_MAP.get(category, 1), attack_id=mitre_att)
-        correct_parent_section.add_tag("source", parser_type)
-        if malware_name:
-            correct_parent_section.add_tag('attribution.implant', malware_name.upper())
-        if mitre_group:
-            correct_parent_section.add_tag('attribution.actor', mitre_group.upper())
-        for malware_type in malware_types:
-            correct_parent_section.add_tag('attribution.family', malware_type.upper())
-
-    # subsection section
-    for key, value in fields.items():
-        if key in FIELD_TAG_MAP:
-            tag = FIELD_TAG_MAP[key]
-            body = []
-            for field in value:
-                if type(field) is str:
-                    body.append({key: field})
-                elif type(field) is list:
-                    body.extend([{key: item} for item in field])
-
-            correct_subsection = ResultSection(
-                title_text=f"Extracted {key.capitalize()}",
-                body=json.dumps(body),
-                body_format=BODY_FORMAT.TABLE,
-            )
-            if 'uri' in tag:
-                tag_network_ioc(correct_subsection, value)
-            else:
-                for v in value:
-                    correct_subsection.add_tag(tag, value)
-            correct_parent_section.add_subsection(correct_subsection)
-
-    # Other key section comes after all subsection builder
-    if other_key in fields:
-        other_content = fields[other_key]
-        other_section = ResultSection(
-            title_text=f"Other metadata found",
-            body_format=BODY_FORMAT.KEY_VALUE,
-            body=json.dumps(other_content)
-        )
-        correct_parent_section.add_subsection(other_section)
-    return correct_parent_section
-
-
-def yield_sample_file_paths():
-    samples_path = os.path.join(TEST_DIR, "samples")
-    # For some reason os.listdir lists the same file twice, but with a trailing space on the second entry
-    paths = set([path.rstrip() for path in os.listdir(samples_path)])
-    for sample in paths:
-        yield os.path.join(samples_path, sample)
-
-
-class TestConfigExtractor:
-
-    @classmethod
-    def setup_class(cls):
-        # Placing the samples in the tmp directory
-        samples_path = os.path.join(TEST_DIR, "samples")
-        for sample in os.listdir(samples_path):
-            sample_path = os.path.join(samples_path, sample)
-            shutil.copyfile(sample_path, os.path.join("/tmp", sample))
-
-    @classmethod
-    def teardown_class(cls):
-        # Cleaning up the tmp directory
-        samples_path = os.path.join(TEST_DIR, "samples")
-        for sample in os.listdir(samples_path):
-            temp_sample_path = os.path.join("/tmp", sample)
-            os.remove(temp_sample_path)
-
-    @staticmethod
-    def test_init(class_instance):
-        from configextractor.cli import register
-        assert class_instance.file_parsers == {}
-        assert class_instance.tag_parsers is None
-        assert class_instance.parser_classification == []
-
-    @staticmethod
-    def test_start(class_instance, parsers):
-        correct_file_parsers, correct_tag_parsers = parsers
-        class_instance.start()
-        # Check if indeed the expected file and tag parsers are the actual file and tag parsers
-        assert class_instance.file_parsers == correct_file_parsers
-        assert class_instance.tag_parsers == correct_tag_parsers
-
-    @staticmethod
-    @pytest.mark.parametrize("sample",
-                             samples
-                             )
-    def test_execute(sample, class_instance):
-        # Imports required to execute the sample
-        from assemblyline_v4_service.common.task import Task
-        from assemblyline.odm.messages.task import Task as ServiceTask
-        from assemblyline_v4_service.common.request import ServiceRequest
-
-        # Creating the required objects for execution
-        service_task = ServiceTask(sample)
-        task = Task(service_task)
-        class_instance._task = task
-        service_request = ServiceRequest(task)
-
-        # Actually executing the sample
-        # task.service_config = {<put service config here>}
-        class_instance.execute(service_request)
-
-    @staticmethod
-    @pytest.mark.parametrize("parser,field_dict,parsertype",
-                             get_section_builder_inputs()
-                             )
-    def test_section_builder(parser, field_dict, parsertype, class_instance, parsers):
-        from assemblyline_v4_service.common.result import Result
-        result = Result()
-        correct_parsers = parsers[0] if parser in parsers[0].keys() else parsers[1]
-        correct_sections = create_correct_result_section_tree(field_dict, parsers, parsertype, parser)
-        class_instance.file_parsers = correct_parsers
-        class_instance.section_builder(parser=parser, field_dict=field_dict, result=result, parsertype=parsertype)
-
-        assert check_section_equality(result.sections[0], correct_sections)
-
-    @staticmethod
-    @pytest.mark.parametrize("res_section,parser_name",
-                             get_classification_checker_inputs()
-                             )
-    def test_classification_checker(res_section, parser_name, parsers):
-        from configextractor_ import classification_checker
-        from assemblyline.common import forge
-        cl_engine = forge.get_classification()
-
-        correct_file_parsers = parsers[0] if parser_name in parsers[0].keys() else parsers[1]
-        parser_classification = correct_file_parsers[parser_name].classification
-        correct_classification = cl_engine.normalize_classification(parser_classification)
-
-        # TODO: Note that classification_checker() only needs the parser classification for the passed parser_name,
-        #  not all parsers
-        test_res_section = classification_checker(res_section=res_section,
-                                                  parser_name=parser_name, file_parsers=correct_file_parsers)
-        assert test_res_section.classification == correct_classification
-
-    @staticmethod
-    @pytest.mark.parametrize("parent_section,fields",
-                             get_subsection_builder_inputs()
-                             )
-    def test_subsection_builder(parent_section, fields):
-        from configextractor_ import subsection_builder
-        correct_parent_section = create_correct_result_section_tree(fields)
-        subsection_builder(parent_section=parent_section, fields=fields)
-        assert check_section_equality(parent_section, correct_parent_section)
-
-
-def get_parser_entries():
-    import yaml
-    stream = open(cli.YARA_PARSER_PATH, 'r')
-    parser_entries = yaml.full_load(stream)
-    return parser_entries
-
-
-def get_validate_parser_inputs():
-    possible_inputs_for_validate_parser = []
-    parser_entries = get_parser_entries()
-    incorrect_key = "incorrect"
-
-    for parser_entry in parser_entries.values():
-        possible_inputs_for_validate_parser.append(parser_entry["parser"])
-    possible_inputs_for_validate_parser.append([{incorrect_key: [incorrect_key]}])
-    return possible_inputs_for_validate_parser
-
-
-def get_report():
-    import mwcp
-    mwcp.register_entry_points()
-    mwcp.register_parser_directory(cli.MWCP_PARSERS_DIR_PATH)
-    reporter = mwcp.Report()
-    return reporter
-
-
-def add_metadata(data, mwcp_key, correct_report=None):
-    if not correct_report:
-        correct_report = get_report()
-    for val in data.values():
-        correct_report.add_metadata(mwcp_key, val)
-    return correct_report
-
-
-def create_correct_parser_objs(tags=None):
-    import yara
-    from configextractor.cli import check_paths, validate_parsers, Parser
-
-    parser_entries = get_parser_entries()
-    parser_objs = {}
-    for parser_name, parser_details in parser_entries.items():
-        rule_source_paths = []
-        # if tags are present then get tag rule paths
-
-        if tags and 'tag' in parser_details['selector']:
-            rule_source_paths = parser_details['selector']['tag']
-        elif not tags and 'yara_rule' in parser_details['selector']:
-            rule_source_paths = parser_details['selector']['yara_rule']
-        if not check_paths(rule_source_paths):
-            continue
-        validated_parsers = validate_parsers(parser_details['parser'])
-        compiled_rules = []
-        for rule_source_path in rule_source_paths:
-            abs_path = os.path.join(ROOT_DIR, rule_source_path)
-            if tags:
-                rule = yara.compile(filepath=abs_path, externals=tags)
-            else:
-                rule = yara.compile(filepath=abs_path)
-            compiled_rules.append(rule)
-        parser_objs[parser_name] = Parser(
-            name=parser_name,
-            parser_list=validated_parsers,
-            compiled_rules=compiled_rules,
-            classification=parser_details['classification'],
-            malware=parser_details['malware'],
-            malware_types=parser_details['malware_type'],
-            mitre_group=parser_details['mitre_group'],
-            mitre_att=parser_details['mitre_att'],
-            category=parser_details['category'],
-            run_on=parser_details['run_on']
-        )
-    return parser_objs
-
-
-def get_tags():
-    from assemblyline.odm.models.tagging import Tagging
-    tags = {f'al_{x.replace(".", "_")}': "" for x in Tagging.flat_fields().keys()}
-    tags["al_file_rule_yara"] = ""
-    return tags
-
-
-def get_new_tags():
-    request_task_tags = {"a": "b"}
-
-    tags = {f"al_{k.replace('.', '_')}": i for k, i in request_task_tags.items()}
-    newtags = {}
-    # yara externals must be dicts w key value pairs being strings
-    for k, v in tags.items():
-        key = f"al_{k.replace('.', '_')}"
-        for i in range(len(v)):
-            if not isinstance(v[i], str):
-                v[i] = str(v[i])
-        value = " | ".join(v)
-        newtags[key] = value
-    return newtags