From b0743c6c350d99c98e366e24bc672dd9b841420c Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Mon, 18 Jul 2022 14:42:30 +0000 Subject: [PATCH 01/23] init revamp --- Dockerfile | 61 ++---- configextractor_.py | 276 --------------------------- configextractor_/configextractor_.py | 81 ++++++++ configextractor_/update_server.py | 87 +++++++++ service_manifest.yml | 42 +++- 5 files changed, 214 insertions(+), 333 deletions(-) delete mode 100755 configextractor_.py create mode 100755 configextractor_/configextractor_.py create mode 100644 configextractor_/update_server.py diff --git a/Dockerfile b/Dockerfile index 12e4bf6..2bf8b55 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,11 @@ ARG branch=latest FROM cccs/assemblyline-v4-service-base:$branch AS base -ENV SERVICE_PATH configextractor_.ConfigExtractor +ENV SERVICE_PATH configextractor_.configextractor_.ConfigExtractor ENV YARA_VERSION=4.2.0 USER assemblyline -RUN pip uninstall -y yara-python +#RUN pip uninstall -y yara-python USER root RUN apt-get update && apt-get install -y git libssl1.1 libmagic1 upx-ucl && rm -rf /var/lib/apt/lists/* @@ -15,60 +15,33 @@ FROM base AS build RUN apt-get update && apt-get install -y git libssl-dev libmagic-dev automake libtool make gcc wget libjansson-dev pkg-config && rm -rf /var/lib/apt/lists/* # Compile and install YARA -RUN wget -O /tmp/yara.tar.gz https://github.com/VirusTotal/yara/archive/v$YARA_VERSION.tar.gz +RUN wget -O /tmp/yara.tar.gz https://github.com/VirusTotal/yara/archive/v${YARA_VERSION}.tar.gz RUN tar -zxf /tmp/yara.tar.gz -C /tmp -WORKDIR /tmp/yara-$YARA_VERSION +WORKDIR /tmp/yara-${YARA_VERSION} RUN ./bootstrap.sh -RUN ./configure --enable-cuckoo --enable-magic --enable-dotnet --with-crypto --prefix /tmp/yara_install +RUN ./configure --enable-magic --enable-dotnet --with-crypto --prefix /tmp/yara_install RUN make RUN make install + # Build the yara python plugins, install other dependencies USER assemblyline RUN touch /tmp/before-pip -# Get ConfigExtractor library -RUN git clone --recurse-submodules https://github.com/CybercentreCanada/configextractor-py.git /tmp/configextractor-py -RUN pip install --global-option="build" --global-option="--enable-dotnet" --global-option="--enable-magic" yara-python==$YARA_VERSION -RUN pip install --no-cache-dir --user --use-deprecated=legacy-resolver \ - gitpython plyara /tmp/configextractor-py/RATDecoders/ /tmp/configextractor-py/ - -RUN git clone https://github.com/kevoreilly/CAPEv2.git /tmp/CAPEv2 - -# Install packages required for CAPE's cuckoo library -RUN pip install chardet pyzipper \ - && rm -rf ~/.cache/pip - -# Remove disabled/test parsers -RUN rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/*.py_disabled -RUN rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/test_cape.py -# Remove 'bad' parsers -RUN rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/LokiBot.py -RUN rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/GuLoader.py -RUN rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/SquirrelWaffle.py -RUN rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/BuerLoader.py - -RUN mkdir -p /tmp/al_service/CAPEv2/modules/processing/parsers/CAPE/ -RUN cp -r /tmp/CAPEv2/modules/processing/parsers/CAPE/* /tmp/al_service/CAPEv2/modules/processing/parsers/CAPE/ -RUN mkdir -p /tmp/al_service/CAPEv2/lib -RUN cp -r /tmp/CAPEv2/lib/* /tmp/al_service/CAPEv2/lib/ - -RUN rm -rf /tmp/CAPEv2 +# Get ConfigExtractor library +RUN pip install -U git+https://github.com/CybercentreCanada/configextractor-py@revamp +RUN pip install -U git+https://github.com/CybercentreCanada/maco -# # Remove files that existed before the pip install so that our copy command below doesn't take a snapshot of -# # files that already exist in the base image -# RUN find /var/lib/assemblyline/.local -type f ! -newer /tmp/before-pip -delete +RUN pip install --no-cache-dir --user --global-option="build" --global-option="--enable-dotnet" --global-option="--enable-magic" git+https://github.com/VirusTotal/yara-python.git +RUN pip install --no-cache-dir --user gitpython plyara markupsafe==2.0.1 -# # Switch back to root and change the ownership of the files to be copied due to bitbucket pipeline uid nonsense -# USER root -# RUN chown root:root -R /var/lib/assemblyline/.local +# Public libraries that can be used by parsers +RUN pip install --no-cache-dir --user netstruct beautifulsoup4 pyOpenSSL # Revert back to before the compile FROM base COPY --from=build /tmp/yara_install /usr/local -COPY --from=build /tmp/configextractor-py/dependencies /opt/al_service/dependencies -COPY --from=build /tmp/al_service/CAPEv2/ /opt/al_service/CAPEv2 COPY --chown=assemblyline:assemblyline --from=build /var/lib/assemblyline/.local /var/lib/assemblyline/.local # Create directories @@ -79,10 +52,6 @@ RUN mkdir -p /opt/al_service WORKDIR /opt/al_service COPY . . -# Make sure we actually have the right version of pyparsing by uninstalling it as root -# then later reinstalling an exact version as the user account -RUN pip uninstall --yes pyparsing flask - # Cleanup RUN rm ./Dockerfile @@ -91,11 +60,7 @@ RUN chown -R assemblyline /opt/al_service # Patch version in manifest ARG version=4.0.0.dev1 -ENV PUBLIC_SERVICE_VERSION=$version -ENV CAPE_PARSERS_DIR=/opt/al_service/CAPEv2/modules/processing/parsers/CAPE/ -ENV PYTHONPATH=$PYTHONPATH:/opt/al_service/CAPEv2/ RUN sed -i -e "s/\$SERVICE_TAG/$version/g" service_manifest.yml # Switch to assemblyline user USER assemblyline -RUN pip install --user pyparsing==2.3.0 flask~=1.1.0 diff --git a/configextractor_.py b/configextractor_.py deleted file mode 100755 index 7af5317..0000000 --- a/configextractor_.py +++ /dev/null @@ -1,276 +0,0 @@ -import ast -from configextractor import cli -import json -import tempfile -import os -import re -from mwcp.metadata import File as file_meta_obj - -from assemblyline.common import forge -from assemblyline.odm.base import IP_ONLY_REGEX, FULL_URI -from assemblyline.odm.models.tagging import Tagging -from assemblyline_v4_service.common.base import ServiceBase -from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT -from typing import List -from urllib3.util import parse_url - -cl_engine = forge.get_classification() - -HEURISTICS_MAP = {"malware": 1, "safe": 2} -# This dict contains fields that we care about, and the corresponding tag if they exist -FIELD_TAG_MAP = { - 'address': 'network.dynamic.uri', - 'c2_address': 'network.dynamic.uri', - 'c2_url': 'network.dynamic.uri', - 'credential': None, - 'directory': 'file.path', - 'email_address': None, - 'event': None, - 'filename': 'file.path', - 'filepath': 'file.path', - 'ftp': None, - 'guid': None, - 'injectionprocess': None, - 'interval': None, - 'key': None, - 'listenport': None, - 'missionid': None, - 'mutex': 'dynamic.mutex', - 'outputfile': None, - 'password': 'file.string.extracted', - 'pipe': None, - 'proxy': None, - 'proxy_address': None, - 'registrydata': None, - 'registrypath': 'dynamic.registry_key', - 'registrypathdata': None, - 'rsa_private_key': None, - 'rsa_public_key': None, - 'service': None, - 'servicedescription': None, - 'servicedisplayname': None, - 'servicedll': None, - 'serviceimage': None, - 'servicename': None, - 'ssl_cert_sha1': None, - 'url': 'network.dynamic.uri', - 'urlpath': None, - 'useragent': None, - 'username': 'file.string.extracted', - 'version': 'file.pe.versions.description' -} - - -class ConfigExtractor(ServiceBase): - def __init__(self, config=None): - super(ConfigExtractor, self).__init__(config) - self.file_parsers = {} - self.tag_parsers = None - self.parser_classification = [] # default should be the classification set for the service. - cli.ROOT_DIR = '/opt/al_service/dependencies/' - cli.init_root_dependencies() - cli.load_parsers() - - def start(self): - yara_externals = {f'al_{x.replace(".", "_")}': "" for x in Tagging.flat_fields().keys()} - yara_externals.update( - { - "al_file_rule_yara": "" - } - ) - file_parsers, tag_parsers = cli.compile(yara_externals) - self.log.info(f"loaded {list(file_parsers.keys())}") - cli.validate_parser_config() - self.file_parsers = file_parsers - self.tag_parsers = tag_parsers - - def execute(self, request): - mwcp_report = cli.register() - result = Result() - # Run Ratdecoders - output = cli.run_ratdecoders(request.file_path, mwcp_report) - if type(output) is str: - if "error" in output: - self.log.warning(output) - else: - self.log.debug(output) - output = "" - if type(output) is dict: - self.log.debug(output) - for parser, fields in output.items(): - self.section_builder(parser, fields, result, "RATDecoder") - - tags = {f"al_{k.replace('.', '_')}": i for k, i in request.task.tags.items()} - newtags = {} - # yara externals must be dicts w key value pairs being strings - for k, v in tags.items(): - key = f"al_{k.replace('.', '_')}" - for i in range(len(v)): - if not isinstance(v[i], str): - v[i] = str(v[i]) - value = " | ".join(v) - newtags[key] = value - # get matches for both, dedup then run - cli.run_mwcfg(request.file_path, mwcp_report) - if request.get_param('use_cape'): - cli.run_cape(request.file_path, mwcp_report) - - # Handle metadata from mwcp_report, generate section - metadata = mwcp_report.metadata - metadata.pop('debug', []) # Dumped as separate file - metadata.pop('other', []) # Another section displays 'other' data - if metadata: - meta_section = ResultSection('MWCP Metadata', body=json.dumps( - metadata), body_format=BODY_FORMAT.JSON) - tags = dict() - for field, data in metadata.items(): - if FIELD_TAG_MAP.get(field): - if FIELD_TAG_MAP[field].startswith('network'): - tag_network_ioc(meta_section, data) - else: - [meta_section.add_tag(FIELD_TAG_MAP[field], d) for d in data] - - if meta_section.tags: - result.add_section(meta_section) - - parsers = cli.deduplicate(self.file_parsers, self.tag_parsers, request.file_path, newtags) - output_fields, reports = cli.run(parsers, request.file_path) - for parser, field_dict in output_fields.items(): - self.section_builder(parser, field_dict, result) - for report in reports: - for metadata_list in report._metadata.values(): - for meta in metadata_list: - if isinstance(meta, file_meta_obj): - with tempfile.NamedTemporaryFile(dir=self.working_directory, delete=False) as tmp_file: - tmp_file.write(meta.data) - tmp_file.seek(0) - request.add_supplementary(tmp_file.name, f"{meta.md5[:5]}_{meta.name}", meta.description) - fd, temp_path = tempfile.mkstemp(dir=self.working_directory) - if output or output_fields: - with os.fdopen(fd, "w") as myfile: - myfile.write(json.dumps(output)) - myfile.write(json.dumps(output_fields)) - request.add_supplementary(temp_path, "output.json", "This is MWCP output as a JSON file") - request.result = result - - def section_builder(self, parser, field_dict, result, parsertype="MWCP"): - json_body = {} - malware_name = '' - malware_types = [] - mitre_group = '' - mitre_att = '' - category = 'malware' - # get malware names from parser objects - if parsertype == "RATDecoder": - malware_name = parser - if parsertype == "MWCP": - for name, obj in self.file_parsers.items(): - if parser in obj.parser_list: - malware_name = obj.malware - malware_types = obj.malware_types if isinstance(obj.malware_types, list) else [obj.malware_types] - mitre_att = obj.mitre_att - mitre_group = obj.mitre_group - category = obj.category - for item in ['classification', 'mitre_group', 'mitre_att', - 'malware', 'malware_types', 'category']: - val = getattr(obj, item, None) - if val: - json_body[item] = val - break - parser_section = ResultSection(f"{parsertype} : {parser}") - - parser_section = classification_checker(parser_section, parser, self.file_parsers) - if len(field_dict) > 0: # if any decoder output exists raise heuristic - parser_section.set_body(json.dumps(json_body), body_format=BODY_FORMAT.KEY_VALUE) - parser_section.set_heuristic(HEURISTICS_MAP.get(category, 1), attack_id=mitre_att) - parser_section.add_tag("source", f"{parsertype}.{parser}") - - if malware_name: - parser_section.add_tag('attribution.implant', malware_name.upper()) - if mitre_group: - parser_section.add_tag('attribution.actor', mitre_group.upper()) - for malware_type in malware_types: - parser_section.add_tag('attribution.family', malware_type.upper()) - # Create subsections and attach them to the main parser_section - subsection_builder(parser_section, field_dict) - - other_key = "other" - if other_key in field_dict: - other_content = field_dict[other_key] - other_section = ResultSection("Other metadata found", body_format=BODY_FORMAT.KEY_VALUE, - body=json.dumps(other_content)) - parser_section.add_subsection(other_section) - - for field in field_dict: - if field != other_key and field not in FIELD_TAG_MAP: - self.log.debug(f"{field} does not exist in FIELD_TAG_MAP") - result.add_section(parser_section) - - -def classification_checker(res_section, parser_name, file_parsers): - for name, parser_obj in file_parsers.items(): - if name == parser_name: - res_section.classification = cl_engine.normalize_classification(parser_obj.classification) - return res_section - - -def subsection_builder(parent_section: ResultSection = None, fields: dict = {}): - for mwcp_field, mwcp_field_data in fields.items(): - if mwcp_field in FIELD_TAG_MAP and mwcp_field_data != ['-']: - tag = FIELD_TAG_MAP[mwcp_field] - table_body = [] - table_section = ResultSection(f"Extracted {mwcp_field.capitalize()}") - - # Make sure data isn't a string representation of a list - for index, data in enumerate(mwcp_field_data): - if isinstance(data, str) and all(symbol in data for symbol in ['[', ']']): - mwcp_field_data.remove(data) - for x in ast.literal_eval(data): - mwcp_field_data.append(x) - - if tag: - # Was a URL/URI tagged? - if 'uri' in tag: - tag_network_ioc(table_section, mwcp_field_data) - else: - for x in mwcp_field_data: - table_section.add_tag(tag, x) - # Tag everything that we can - # Add data to section body - for line in mwcp_field_data: - if type(line) is str: - table_body.append({mwcp_field: line}) - elif type(line) is list: - for item in line: - table_body.append({mwcp_field: item}) - table_section.set_body(body_format=BODY_FORMAT.TABLE, body=json.dumps(table_body)) - - parent_section.add_subsection(table_section) - - -def tag_network_ioc(section: ResultSection, dataset: List[str]) -> None: - if not section.heuristic: - # Heuristic should only be applied once - section.set_heuristic(3) - for data in dataset: - # Tests indicated the possibilty of nested lists - main_tag = None - if isinstance(data, list): - tag_network_ioc(section, data) - elif re.match(IP_ONLY_REGEX, data): - main_tag = 'network.dynamic.ip' - elif re.match(FULL_URI, data): - main_tag = 'network.dynamic.uri' - # Deconstruct the raw data to additional tagging - parsed_uri = parse_url(data) - if parsed_uri.host: - # tag_reducer will de-dup IP being tagged twice - host_tag = 'network.dynamic.ip' if re.match( - IP_ONLY_REGEX, parsed_uri.host) else 'network.dynamic.domain' - section.add_tag(host_tag, parsed_uri.host) - if parsed_uri.port: - section.add_tag('network.port', parsed_uri.port) - if parsed_uri.path: - section.add_tag('network.dynamic.uri_path', parsed_uri.path) - if main_tag: - section.add_tag(main_tag, data) diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py new file mode 100755 index 0000000..bebe6ed --- /dev/null +++ b/configextractor_/configextractor_.py @@ -0,0 +1,81 @@ +from collections import defaultdict + +from assemblyline.common import forge +from assemblyline.odm.base import IP_ONLY_REGEX, FULL_URI, DOMAIN_ONLY_REGEX +from assemblyline.odm.models.tagging import Tagging +from assemblyline_v4_service.common.base import ServiceBase +from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT + +import json +import hashlib +import os +import regex + +from configextractor.main import ConfigExtractor as CX + + +cl_engine = forge.get_classification() + + +class ConfigExtractor(ServiceBase): + def __init__(self, config=None): + super(ConfigExtractor, self).__init__(config) + self.cx = None + + # Generate the rules_hash and init rules_list based on the raw files in the rules_directory from updater + def _gen_rules_hash(self) -> str: + self.rules_list = [] + for obj in os.listdir(self.rules_directory): + obj_path = os.path.join(self.rules_directory, obj) + if os.path.isdir(obj_path): + self.rules_list.append(obj_path) + all_sha256s = [f for f in self.rules_list] + + if len(all_sha256s) == 1: + return all_sha256s[0][:7] + + return hashlib.sha256(' '.join(sorted(all_sha256s)).encode('utf-8')).hexdigest()[:7] + + def _load_rules(self) -> None: + if self.rules_list: + self.log.debug(self.rules_list) + blocklist = [] + blocklist_location = os.path.join(self.rules_directory, 'blocked_parsers') + if os.path.exists(blocklist_location): + for line in open(blocklist_location, 'r').readlines(): + _, source, _, parser_name = line.split('_') + blocklist.append(rf"*{parser_name}$") + self.log.info(f'Blocking the following parsers matching these patterns: {blocklist}') + self.cx = CX(parsers_dirs=self.rules_list, logger=self.log, parser_blocklist=blocklist) + + if not self.cx: + raise Exception("Unable to start ConfigExtractor because can't find directory containing parsers") + + if not self.cx.parsers: + raise Exception( + f"Unable to start ConfigExtractor because can't find parsers in given directory: {self.rules_directory}") + + def tag_output(self, output: dict, tags: dict = {}): + for value in output.values(): + if isinstance(value, dict): + self.tag_output(value, tags) + elif isinstance(value, list): + for v in value: + self.tag_output(value, tags) + + if isinstance(value, str): + if regex.search(IP_ONLY_REGEX, value): + tags['network.static.ip'].append(value) + elif regex.search(DOMAIN_ONLY_REGEX, value): + tags['network.static.domain'].append(value) + elif regex.search(FULL_URI, value): + tags['network.static.uri'].append(value) + + def execute(self, request): + result = Result() + config_result = self.cx.run_parsers(request.file_path) + tags = defaultdict(list) + self.tag_output(config_result, tags) + result.add_section(ResultSection('Output', body=json.dumps(config_result), + body_format=BODY_FORMAT.JSON, tags=tags)) + request.result = result diff --git a/configextractor_/update_server.py b/configextractor_/update_server.py new file mode 100644 index 0000000..a91de00 --- /dev/null +++ b/configextractor_/update_server.py @@ -0,0 +1,87 @@ +import os +import shutil +import tempfile + +from assemblyline.common import forge +from assemblyline.common.isotime import epoch_to_iso +from assemblyline.odm.models.signature import Signature +from assemblyline_client import get_client +from assemblyline_v4_service.updater.updater import ServiceUpdater, temporary_api_key, UPDATER_DIR, UI_SERVER +from configextractor.main import ConfigExtractor + + +classification = forge.get_classification() + + +class CXUpdateServer(ServiceUpdater): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def import_update(self, files_sha256, client, source_name, default_classification=classification.UNRESTRICTED): + def import_parsers(cx: ConfigExtractor): + upload_list = list() + parser_paths = cx.parsers.keys() + for parser_path in parser_paths: + parser_details = cx.get_details(parser_path) + if parser_details: + upload_list.append(Signature(dict( + classification=parser_details['classification'] or default_classification, + data=open(parser_path, 'r').read(), + name=parser_details['name'], + signature_id=f"{parser_details['framework']}_{os.path.basename(parser_path)}", + source=source_name, + type='configextractor', + status="DEPLOYED", + )).as_primitives()) + return client.signature.add_update_many(source_name, 'configextractor', upload_list, dedup_name=False) + + for dir, _ in files_sha256: + # Remove cached duplicates + dir = dir[:-1] + self.log.info(dir) + cx = ConfigExtractor(parsers_dir=dir, logger=self.log, check_extension=True) + resp = import_parsers(cx) + self.log.info(f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline.") + self.log.debug(resp) + + # Save a local copy of the directory that may potentially contain dependency libraries for the parsers + try: + shutil.move(dir, os.path.join(self.latest_updates_dir, source_name)) + except shutil.Error as e: + if 'already exists' in str(e): + continue + raise e + + def do_local_update(self) -> None: + old_update_time = self.get_local_update_time() + if not os.path.exists(UPDATER_DIR): + os.makedirs(UPDATER_DIR) + + _, time_keeper = tempfile.mkstemp(prefix="time_keeper_", dir=UPDATER_DIR) + self.log.info("Setup service account.") + username = self.ensure_service_account() + self.log.info("Create temporary API key.") + with temporary_api_key(self.datastore, username) as api_key: + self.log.info(f"Connecting to Assemblyline API: {UI_SERVER}") + al_client = get_client(UI_SERVER, apikey=(username, api_key), verify=False) + + # Check if new signatures have been added + self.log.info("Check for new signatures.") + if al_client.signature.update_available(since=epoch_to_iso(old_update_time) or '', + sig_type=self.updater_type)['update_available']: + self.log.info("An update is available for download from the datastore") + self.log.debug(f"{self.updater_type} update available since {epoch_to_iso(old_update_time) or ''}") + + blocklisted_parsers = list() + [blocklisted_parsers.extend(list(item.values())) + for item in al_client.search.signature(f'type:{self.updater_type} AND status:DISABLED', + fl='id')['items']] + self.log.debug(f'Blocking the following parsers: {blocklisted_parsers}') + output_directory = self.prepare_output_directory() + open(os.path.join(output_directory, 'blocked_parsers'), 'w').write('\n'.join(blocklisted_parsers)) + self.serve_directory(output_directory, time_keeper) + + +if __name__ == '__main__': + with CXUpdateServer() as server: + server.serve_forever() diff --git a/service_manifest.yml b/service_manifest.yml index 374b7a6..6227725 100755 --- a/service_manifest.yml +++ b/service_manifest.yml @@ -17,20 +17,14 @@ timeout: 10 disable_cache: false -enabled: true +# Disabled at first to Administrator can assign appropriate storage class +enabled: false is_external: false licence_count: 0 -uses_tags: true - -submission_params: - # Use CAPE parsers - - name: use_cape - type: bool - value: false - default: false +uses_tags: false # Service configuration block (dictionary of config variables) heuristics: @@ -59,3 +53,33 @@ docker_config: image: ${REGISTRY}cccs/assemblyline-service-configextractor:$SERVICE_TAG cpu_cores: 1.0 ram_mb: 1024 + +dependencies: + updates: + container: + allow_internet_access: true + command: ["python", "-m", "configextractor_.update_server"] + image: ${REGISTRY}cccs/assemblyline-service-configextractor:$SERVICE_TAG + ports: ["5003"] + environment: + - name: UPDATER_DIR + value: /mnt/updates + + run_as_core: True + volumes: + updates: + mount_path: /mnt/updates + capacity: 5120 + storage_class: default + + +update_config: + generates_signatures: true + sources: + - name: CAPE + pattern: .*/modules/processing/parsers/CAPE/$ + uri: https://github.com/kevoreilly/CAPEv2.git + classification: TLP:W + update_interval_seconds: 21600 # Quarter-day (every 6 hours) + wait_for_update: true + signature_delimiter: file From dacc6d9774ec653afa4fb1bf1d0948c993b88baa Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Mon, 18 Jul 2022 14:44:25 +0000 Subject: [PATCH 02/23] fix classification setting for source --- service_manifest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service_manifest.yml b/service_manifest.yml index 6227725..2969e1c 100755 --- a/service_manifest.yml +++ b/service_manifest.yml @@ -79,7 +79,7 @@ update_config: - name: CAPE pattern: .*/modules/processing/parsers/CAPE/$ uri: https://github.com/kevoreilly/CAPEv2.git - classification: TLP:W + default_classification: TLP:W update_interval_seconds: 21600 # Quarter-day (every 6 hours) wait_for_update: true signature_delimiter: file From 25be8ff439c2361a43fc9c66925ae52ef80ba86f Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Mon, 18 Jul 2022 14:49:50 +0000 Subject: [PATCH 03/23] remove testing for now --- pipelines/azure-build.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelines/azure-build.yaml b/pipelines/azure-build.yaml index 0a8a036..0ad07be 100644 --- a/pipelines/azure-build.yaml +++ b/pipelines/azure-build.yaml @@ -27,9 +27,9 @@ stages: if [[ "$TAG" == *stable* ]]; then export BUILD_TYPE=stable; else export BUILD_TYPE=latest; fi docker build --build-arg version=$TAG --build-arg branch=$BUILD_TYPE -t cccs/assemblyline-service-configextractor:$TAG -t cccs/assemblyline-service-configextractor:$BUILD_TYPE -f ./Dockerfile . displayName: Build containers - - script: | - docker run -v `pwd`/test/:/opt/al_service/test/ cccs/assemblyline-service-configextractor:latest bash -c 'pip install -U -r test/requirements.txt; pytest' - displayName: Test containers + # - script: | + # docker run -v `pwd`/test/:/opt/al_service/test/ cccs/assemblyline-service-configextractor:latest bash -c 'pip install -U -r test/requirements.txt; pytest' + # displayName: Test containers - script: | docker push cccs/assemblyline-service-configextractor --all-tags displayName: Deploy to Docker Hub From a8f6287ae101e11f306eaec2fed5cf5255f25c26 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Mon, 18 Jul 2022 15:00:37 +0000 Subject: [PATCH 04/23] fix kwarg on update --- configextractor_/update_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configextractor_/update_server.py b/configextractor_/update_server.py index a91de00..4a97574 100644 --- a/configextractor_/update_server.py +++ b/configextractor_/update_server.py @@ -39,7 +39,7 @@ def import_parsers(cx: ConfigExtractor): # Remove cached duplicates dir = dir[:-1] self.log.info(dir) - cx = ConfigExtractor(parsers_dir=dir, logger=self.log, check_extension=True) + cx = ConfigExtractor(parsers_dirs=[dir], logger=self.log, check_extension=True) resp = import_parsers(cx) self.log.info(f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline.") self.log.debug(resp) From 3366f0cfe8e4199337363d886943c006a63b6391 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Mon, 18 Jul 2022 15:43:31 +0000 Subject: [PATCH 05/23] only split on the first 3 --- configextractor_/configextractor_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index bebe6ed..4c77a40 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -43,7 +43,7 @@ def _load_rules(self) -> None: blocklist_location = os.path.join(self.rules_directory, 'blocked_parsers') if os.path.exists(blocklist_location): for line in open(blocklist_location, 'r').readlines(): - _, source, _, parser_name = line.split('_') + _, source, _, parser_name = line.split('_', 3) blocklist.append(rf"*{parser_name}$") self.log.info(f'Blocking the following parsers matching these patterns: {blocklist}') self.cx = CX(parsers_dirs=self.rules_list, logger=self.log, parser_blocklist=blocklist) From 1cd34bad8d1eed09053201f3f859a36b4bd8b56f Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Tue, 2 Aug 2022 15:54:29 +0000 Subject: [PATCH 06/23] fix regex for blocklist --- configextractor_/configextractor_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index 4c77a40..d74e317 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -44,7 +44,7 @@ def _load_rules(self) -> None: if os.path.exists(blocklist_location): for line in open(blocklist_location, 'r').readlines(): _, source, _, parser_name = line.split('_', 3) - blocklist.append(rf"*{parser_name}$") + blocklist.append(rf".*{parser_name}$") self.log.info(f'Blocking the following parsers matching these patterns: {blocklist}') self.cx = CX(parsers_dirs=self.rules_list, logger=self.log, parser_blocklist=blocklist) From b288d18dea7136a536561b74554aa46ada223f8a Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Tue, 2 Aug 2022 17:32:50 +0000 Subject: [PATCH 07/23] make tagging more robust --- configextractor_/configextractor_.py | 34 +++++++++++++++++----------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index d74e317..1144a3c 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -1,4 +1,5 @@ from collections import defaultdict +from typing import Any from assemblyline.common import forge from assemblyline.odm.base import IP_ONLY_REGEX, FULL_URI, DOMAIN_ONLY_REGEX @@ -55,21 +56,28 @@ def _load_rules(self) -> None: raise Exception( f"Unable to start ConfigExtractor because can't find parsers in given directory: {self.rules_directory}") - def tag_output(self, output: dict, tags: dict = {}): - for value in output.values(): - if isinstance(value, dict): - self.tag_output(value, tags) - elif isinstance(value, list): - for v in value: + # Temporary tagging method until CAPE is switched over to MACO modelling + def tag_output(self, output: Any, tags: dict = {}): + def tag_string(value): + if regex.search(IP_ONLY_REGEX, value): + tags['network.static.ip'].append(value) + elif regex.search(DOMAIN_ONLY_REGEX, value): + tags['network.static.domain'].append(value) + elif regex.search(FULL_URI, value): + tags['network.static.uri'].append(value) + + if isinstance(output, dict): + # Iterate over valuse of dictionary + for value in output.values(): + if isinstance(value, dict): self.tag_output(value, tags) + elif isinstance(value, list): + [self.tag_output(v, tags) for v in value] + elif isinstance(value, str): + tag_string(value) - if isinstance(value, str): - if regex.search(IP_ONLY_REGEX, value): - tags['network.static.ip'].append(value) - elif regex.search(DOMAIN_ONLY_REGEX, value): - tags['network.static.domain'].append(value) - elif regex.search(FULL_URI, value): - tags['network.static.uri'].append(value) + elif isinstance(value, str): + tag_string(value) def execute(self, request): result = Result() From f5eba84a4ce53ce571f22a595dc9691534618077 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Tue, 2 Aug 2022 17:55:04 +0000 Subject: [PATCH 08/23] fix copy-paste --- configextractor_/configextractor_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index 1144a3c..793ff12 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -76,8 +76,8 @@ def tag_string(value): elif isinstance(value, str): tag_string(value) - elif isinstance(value, str): - tag_string(value) + elif isinstance(output, str): + tag_string(output) def execute(self, request): result = Result() From 989e0bffe9b334b5a82d369aea9fd7e9a9de3977 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Mon, 15 Aug 2022 13:01:05 +0000 Subject: [PATCH 09/23] Handle updating sources better; misc. changes --- Dockerfile | 8 ++++---- configextractor_/update_server.py | 29 ++++++++++++++++++----------- service_manifest.yml | 6 +++--- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2bf8b55..6669d06 100755 --- a/Dockerfile +++ b/Dockerfile @@ -2,13 +2,13 @@ ARG branch=latest FROM cccs/assemblyline-v4-service-base:$branch AS base ENV SERVICE_PATH configextractor_.configextractor_.ConfigExtractor -ENV YARA_VERSION=4.2.0 +ENV YARA_VERSION=4.2.3 USER assemblyline -#RUN pip uninstall -y yara-python +RUN pip uninstall -y yara-python USER root -RUN apt-get update && apt-get install -y git libssl1.1 libmagic1 upx-ucl && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y git libssl1.1 libmagic1 upx-ucl mono-complete && rm -rf /var/lib/apt/lists/* # Create a temporary image to do our compiling in FROM base AS build @@ -32,7 +32,7 @@ RUN touch /tmp/before-pip RUN pip install -U git+https://github.com/CybercentreCanada/configextractor-py@revamp RUN pip install -U git+https://github.com/CybercentreCanada/maco -RUN pip install --no-cache-dir --user --global-option="build" --global-option="--enable-dotnet" --global-option="--enable-magic" git+https://github.com/VirusTotal/yara-python.git +RUN pip install --no-cache-dir --user --global-option="build" --global-option="--enable-dotnet" --global-option="--enable-magic" git+https://github.com/VirusTotal/yara-python.git@d29ca083f4cb25ea52988314b844bb7cf8594b5b RUN pip install --no-cache-dir --user gitpython plyara markupsafe==2.0.1 # Public libraries that can be used by parsers diff --git a/configextractor_/update_server.py b/configextractor_/update_server.py index 4a97574..58d4187 100644 --- a/configextractor_/update_server.py +++ b/configextractor_/update_server.py @@ -21,6 +21,7 @@ def import_update(self, files_sha256, client, source_name, default_classificatio def import_parsers(cx: ConfigExtractor): upload_list = list() parser_paths = cx.parsers.keys() + self.log.debug(f"Importing following parsers: {parser_paths}") for parser_path in parser_paths: parser_details = cx.get_details(parser_path) if parser_details: @@ -39,18 +40,24 @@ def import_parsers(cx: ConfigExtractor): # Remove cached duplicates dir = dir[:-1] self.log.info(dir) - cx = ConfigExtractor(parsers_dirs=[dir], logger=self.log, check_extension=True) - resp = import_parsers(cx) - self.log.info(f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline.") - self.log.debug(resp) + cx = ConfigExtractor(parsers_dirs=[dir], logger=self.log) + if cx.parsers: + self.log.info(f"Found {len(cx.parsers)} parsers from {source_name}") + resp = import_parsers(cx) + self.log.info(f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline.") + self.log.debug(resp) - # Save a local copy of the directory that may potentially contain dependency libraries for the parsers - try: - shutil.move(dir, os.path.join(self.latest_updates_dir, source_name)) - except shutil.Error as e: - if 'already exists' in str(e): - continue - raise e + # Save a local copy of the directory that may potentially contain dependency libraries for the parsers + try: + destination = os.path.join(self.latest_updates_dir, source_name) + # Removing old version of directory if exists + if os.path.exists(destination): + shutil.rmtree(destination) + shutil.move(dir, destination) + except shutil.Error as e: + if 'already exists' in str(e): + continue + raise e def do_local_update(self) -> None: old_update_time = self.get_local_update_time() diff --git a/service_manifest.yml b/service_manifest.yml index 2969e1c..9f458c6 100755 --- a/service_manifest.yml +++ b/service_manifest.yml @@ -69,16 +69,16 @@ dependencies: volumes: updates: mount_path: /mnt/updates - capacity: 5120 + capacity: 1048576 #1Gi storage_class: default - update_config: generates_signatures: true sources: + # Pending: https://github.com/kevoreilly/CAPEv2/pull/1037 - name: CAPE pattern: .*/modules/processing/parsers/CAPE/$ - uri: https://github.com/kevoreilly/CAPEv2.git + uri: https://github.com/cccs-rs/CAPEv2.git default_classification: TLP:W update_interval_seconds: 21600 # Quarter-day (every 6 hours) wait_for_update: true From 28e3f4a7149d76d4308b2623be0e7280008340a1 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Tue, 16 Aug 2022 18:15:54 +0000 Subject: [PATCH 10/23] Update heuristics; Network IOCs section --- configextractor_/configextractor_.py | 59 +++++++++++++++++++++++++--- service_manifest.yml | 22 ++++++----- 2 files changed, 66 insertions(+), 15 deletions(-) diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index 793ff12..3cf0714 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -5,18 +5,21 @@ from assemblyline.odm.base import IP_ONLY_REGEX, FULL_URI, DOMAIN_ONLY_REGEX from assemblyline.odm.models.tagging import Tagging from assemblyline_v4_service.common.base import ServiceBase -from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT +from assemblyline_v4_service.common.result import Result, ResultSection, ResultTableSection, BODY_FORMAT, TableRow, Heuristic import json import hashlib import os import regex +import tempfile from configextractor.main import ConfigExtractor as CX - +from maco.model import ExtractorModel, ConnUsageEnum cl_engine = forge.get_classification() +CONNECTION_USAGE = [k.name for k in ConnUsageEnum] + class ConfigExtractor(ServiceBase): def __init__(self, config=None): @@ -79,11 +82,55 @@ def tag_string(value): elif isinstance(output, str): tag_string(output) + def network_ioc_section(self, config) -> ResultSection: + network_section = ResultSection("Network IOCs") + + network_fields = { + 'ftp': ExtractorModel.FTP, + 'smtp': ExtractorModel.SMTP, + 'http': ExtractorModel.Http, + 'ssh': ExtractorModel.SSH, + 'proxy': ExtractorModel.Proxy, + 'dns': ExtractorModel.DNS, + 'tcp': ExtractorModel.Connection, + 'udp': ExtractorModel.Connection + } + for field, model in network_fields.items(): + sorted_network_config = {} + for network_config in config.get(field, []): + sorted_network_config.setdefault(network_config.get('usage', 'other'), []).append(network_config) + + if sorted_network_config: + connection_section = ResultSection(field.upper(), parent=network_section) + for usage, connections in sorted_network_config.items(): + tags = list() + self.tag_output(connections, tags) + table_section = ResultTableSection(title_text=f"Usage: {usage.upper()} x{len(connections)}", parent=connection_section, heuristic=Heuristic(2, signature=usage), tags=tags) + [table_section.add_row(TableRow(**model(**c).dict())) for c in connections] + + if network_section.subsections: + return network_section + def execute(self, request): result = Result() config_result = self.cx.run_parsers(request.file_path) - tags = defaultdict(list) - self.tag_output(config_result, tags) - result.add_section(ResultSection('Output', body=json.dumps(config_result), - body_format=BODY_FORMAT.JSON, tags=tags)) + if not config_result: + request.result = result + return + + a = tempfile.NamedTemporaryFile(delete=False) + a.write(json.dumps(config_result).encode()) + a.seek(0) + request.add_supplementary(a.name, f"{request.sha256}_malware_config.json", "Raw output from configextractor-py") + for parser_framework, parser_results in config_result.items(): + framework_section = ResultSection(parser_framework, parent=result, auto_collapse=True) + for parser_name, parser_output in parser_results.items(): + config = parser_output.pop('config') + parser_output['family'] = config.pop('family') + parser_section = ResultSection(title_text=parser_name, body=json.dumps(parser_output), parent=framework_section, body_format=BODY_FORMAT.KEY_VALUE) + network_section = self.network_ioc_section(config) + if network_section: + parser_section.add_subsection(network_section) + + request.result = result diff --git a/service_manifest.yml b/service_manifest.yml index 9f458c6..fa34b04 100755 --- a/service_manifest.yml +++ b/service_manifest.yml @@ -33,18 +33,22 @@ heuristics: score: 1000 filetype: "*" description: Category - Malware - Indicates configuration block was extracted - - heur_id: 2 - name: Safe - score: -1000 - filetype: "*" - description: Category - Safe - - - heur_id: 3 name: De-obfuscated Network IOCs - score: 1000 + score: 50 filetype: "*" - description: Category - Malicious - Indicates a network IOC was extracted from malware configuration + signature_score_map: + # Connection usage may be indicative of maliciousness + c2: 1000 + upload: 1000 + download: 1000 + propagate: 1000 + tunnel: 1000 + ransom: 1000 + decoy: 10 # Used to mask actual malicious connection but the connections themselves aren't malicious + other: 10 + + description: Indicates a network IOC was extracted from malware configuration # Docker configuration block which defines: # - the name of the docker container that will be created From 3fcf1e0adf80898b2d7c45383e4b4979976e2207 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Wed, 17 Aug 2022 16:40:34 +0000 Subject: [PATCH 11/23] Setup linking from results to signatures --- Dockerfile | 4 +++- configextractor_/configextractor_.py | 8 +++++++- configextractor_/update_server.py | 24 ++++++++++++++++++------ 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6669d06..8d04c93 100755 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,7 @@ ENV SERVICE_PATH configextractor_.configextractor_.ConfigExtractor ENV YARA_VERSION=4.2.3 USER assemblyline -RUN pip uninstall -y yara-python +#RUN pip uninstall -y yara-python USER root RUN apt-get update && apt-get install -y git libssl1.1 libmagic1 upx-ucl mono-complete && rm -rf /var/lib/apt/lists/* @@ -47,6 +47,8 @@ COPY --chown=assemblyline:assemblyline --from=build /var/lib/assemblyline/.local # Create directories RUN mkdir -p /mount/updates RUN mkdir -p /opt/al_service +RUN mkdir -p /updates +RUN chown -R assemblyline:assemblyline /updates # Copy service code WORKDIR /opt/al_service diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index 3cf0714..bf82417 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -25,6 +25,7 @@ class ConfigExtractor(ServiceBase): def __init__(self, config=None): super(ConfigExtractor, self).__init__(config) self.cx = None + self.source_map = None # Generate the rules_hash and init rules_list based on the raw files in the rules_directory from updater def _gen_rules_hash(self) -> str: @@ -45,6 +46,7 @@ def _load_rules(self) -> None: self.log.debug(self.rules_list) blocklist = [] blocklist_location = os.path.join(self.rules_directory, 'blocked_parsers') + self.source_map = json.loads(open(os.path.join(self.rules_directory, 'source_mapping.json')).read()) if os.path.exists(blocklist_location): for line in open(blocklist_location, 'r').readlines(): _, source, _, parser_name = line.split('_', 3) @@ -127,7 +129,11 @@ def execute(self, request): for parser_name, parser_output in parser_results.items(): config = parser_output.pop('config') parser_output['family'] = config.pop('family') - parser_section = ResultSection(title_text=parser_name, body=json.dumps(parser_output), parent=framework_section, body_format=BODY_FORMAT.KEY_VALUE) + id = f'{parser_framework}_{parser_name}' + id_details = self.source_map[id] + parser_section = ResultSection(title_text=parser_name, body=json.dumps(parser_output), + parent=framework_section, body_format=BODY_FORMAT.KEY_VALUE, + tags={'file.rule.configextractor': f"{id_details['source_name']}.{parser_name}"}, classification=id_details['classification']) network_section = self.network_ioc_section(config) if network_section: parser_section.add_subsection(network_section) diff --git a/configextractor_/update_server.py b/configextractor_/update_server.py index 58d4187..0ca5b07 100644 --- a/configextractor_/update_server.py +++ b/configextractor_/update_server.py @@ -1,4 +1,5 @@ import os +import json import shutil import tempfile @@ -12,7 +13,6 @@ classification = forge.get_classification() - class CXUpdateServer(ServiceUpdater): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -22,19 +22,23 @@ def import_parsers(cx: ConfigExtractor): upload_list = list() parser_paths = cx.parsers.keys() self.log.debug(f"Importing following parsers: {parser_paths}") + source_map = {} for parser_path in parser_paths: parser_details = cx.get_details(parser_path) if parser_details: + id = f"{parser_details['framework']}_{parser_details['name']}" + classification = parser_details['classification'] or default_classification + source_map[id] = dict(classification=classification, source_name=source_name) upload_list.append(Signature(dict( - classification=parser_details['classification'] or default_classification, + classification=classification, data=open(parser_path, 'r').read(), name=parser_details['name'], - signature_id=f"{parser_details['framework']}_{os.path.basename(parser_path)}", + signature_id=id, source=source_name, type='configextractor', status="DEPLOYED", )).as_primitives()) - return client.signature.add_update_many(source_name, 'configextractor', upload_list, dedup_name=False) + return client.signature.add_update_many(source_name, 'configextractor', upload_list, dedup_name=False), source_map for dir, _ in files_sha256: # Remove cached duplicates @@ -43,17 +47,25 @@ def import_parsers(cx: ConfigExtractor): cx = ConfigExtractor(parsers_dirs=[dir], logger=self.log) if cx.parsers: self.log.info(f"Found {len(cx.parsers)} parsers from {source_name}") - resp = import_parsers(cx) + resp, source_map = import_parsers(cx) self.log.info(f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline.") self.log.debug(resp) + self.log.debug(source_map) # Save a local copy of the directory that may potentially contain dependency libraries for the parsers try: destination = os.path.join(self.latest_updates_dir, source_name) + source_mapping_file = os.path.join(self.latest_updates_dir, 'source_mapping.json') # Removing old version of directory if exists if os.path.exists(destination): shutil.rmtree(destination) shutil.move(dir, destination) + if os.path.exists(source_mapping_file): + _tmp = json.loads(open(source_mapping_file, 'r').read()) + _tmp.update(source_map) + source_map = _tmp + + open(source_mapping_file, 'w').write(json.dumps(source_map)) except shutil.Error as e: if 'already exists' in str(e): continue @@ -64,7 +76,6 @@ def do_local_update(self) -> None: if not os.path.exists(UPDATER_DIR): os.makedirs(UPDATER_DIR) - _, time_keeper = tempfile.mkstemp(prefix="time_keeper_", dir=UPDATER_DIR) self.log.info("Setup service account.") username = self.ensure_service_account() self.log.info("Create temporary API key.") @@ -76,6 +87,7 @@ def do_local_update(self) -> None: self.log.info("Check for new signatures.") if al_client.signature.update_available(since=epoch_to_iso(old_update_time) or '', sig_type=self.updater_type)['update_available']: + _, time_keeper = tempfile.mkstemp(prefix="time_keeper_", dir=UPDATER_DIR) self.log.info("An update is available for download from the datastore") self.log.debug(f"{self.updater_type} update available since {epoch_to_iso(old_update_time) or ''}") From d504b4db0787fa4c5874fee31d15a425e96e36f0 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Wed, 17 Aug 2022 16:43:39 +0000 Subject: [PATCH 12/23] undo dev changes in Dockerfile --- Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8d04c93..6669d06 100755 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,7 @@ ENV SERVICE_PATH configextractor_.configextractor_.ConfigExtractor ENV YARA_VERSION=4.2.3 USER assemblyline -#RUN pip uninstall -y yara-python +RUN pip uninstall -y yara-python USER root RUN apt-get update && apt-get install -y git libssl1.1 libmagic1 upx-ucl mono-complete && rm -rf /var/lib/apt/lists/* @@ -47,8 +47,6 @@ COPY --chown=assemblyline:assemblyline --from=build /var/lib/assemblyline/.local # Create directories RUN mkdir -p /mount/updates RUN mkdir -p /opt/al_service -RUN mkdir -p /updates -RUN chown -R assemblyline:assemblyline /updates # Copy service code WORKDIR /opt/al_service From 73b6d4314d658aa05a7ccf8deafe3685a3d2b6af Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Wed, 17 Aug 2022 17:49:57 +0000 Subject: [PATCH 13/23] Allow updater to fetch python packages for service --- configextractor_/configextractor_.py | 5 +++++ configextractor_/update_server.py | 13 +++++++++++++ 2 files changed, 18 insertions(+) diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index bf82417..ca40572 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -11,6 +11,7 @@ import hashlib import os import regex +import sys import tempfile from configextractor.main import ConfigExtractor as CX @@ -47,6 +48,10 @@ def _load_rules(self) -> None: blocklist = [] blocklist_location = os.path.join(self.rules_directory, 'blocked_parsers') self.source_map = json.loads(open(os.path.join(self.rules_directory, 'source_mapping.json')).read()) + python_packages_dir = os.path.join(self.rules_directory, 'python_packages') + if python_packages_dir not in sys.path: + sys.path.append(python_packages_dir) + if os.path.exists(blocklist_location): for line in open(blocklist_location, 'r').readlines(): _, source, _, parser_name = line.split('_', 3) diff --git a/configextractor_/update_server.py b/configextractor_/update_server.py index 0ca5b07..e84ef35 100644 --- a/configextractor_/update_server.py +++ b/configextractor_/update_server.py @@ -1,8 +1,10 @@ import os import json import shutil +import subprocess import tempfile + from assemblyline.common import forge from assemblyline.common.isotime import epoch_to_iso from assemblyline.odm.models.signature import Signature @@ -52,6 +54,17 @@ def import_parsers(cx: ConfigExtractor): self.log.debug(resp) self.log.debug(source_map) + # Find any requirement files and pip install to a specific directory that will get transferred to services + for root, _, files in os.walk(dir): + for file in files: + if file == "requirements.txt": + err = subprocess.run(['pip', 'install', + '-r', os.path.join(root, file), + '-t', os.path.join(self.latest_updates_dir, 'python_packages')]).stderr + if err: + self.log.error(err) + + # Save a local copy of the directory that may potentially contain dependency libraries for the parsers try: destination = os.path.join(self.latest_updates_dir, source_name) From 1ac3fda15ea638c31f2384160e78e882922178ae Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Wed, 17 Aug 2022 19:27:39 +0000 Subject: [PATCH 14/23] black formatting --- configextractor_/configextractor_.py | 153 ++++++++++++++++++++------- configextractor_/update_server.py | 114 ++++++++++++++------ 2 files changed, 194 insertions(+), 73 deletions(-) diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index ca40572..d857ba8 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -1,11 +1,16 @@ -from collections import defaultdict from typing import Any from assemblyline.common import forge from assemblyline.odm.base import IP_ONLY_REGEX, FULL_URI, DOMAIN_ONLY_REGEX -from assemblyline.odm.models.tagging import Tagging from assemblyline_v4_service.common.base import ServiceBase -from assemblyline_v4_service.common.result import Result, ResultSection, ResultTableSection, BODY_FORMAT, TableRow, Heuristic +from assemblyline_v4_service.common.result import ( + Result, + ResultSection, + ResultTableSection, + BODY_FORMAT, + TableRow, + Heuristic, +) import json import hashlib @@ -40,41 +45,54 @@ def _gen_rules_hash(self) -> str: if len(all_sha256s) == 1: return all_sha256s[0][:7] - return hashlib.sha256(' '.join(sorted(all_sha256s)).encode('utf-8')).hexdigest()[:7] + return hashlib.sha256( + " ".join(sorted(all_sha256s)).encode("utf-8") + ).hexdigest()[:7] def _load_rules(self) -> None: if self.rules_list: self.log.debug(self.rules_list) blocklist = [] - blocklist_location = os.path.join(self.rules_directory, 'blocked_parsers') - self.source_map = json.loads(open(os.path.join(self.rules_directory, 'source_mapping.json')).read()) - python_packages_dir = os.path.join(self.rules_directory, 'python_packages') + blocklist_location = os.path.join(self.rules_directory, "blocked_parsers") + self.source_map = json.loads( + open(os.path.join(self.rules_directory, "source_mapping.json")).read() + ) + python_packages_dir = os.path.join(self.rules_directory, "python_packages") if python_packages_dir not in sys.path: sys.path.append(python_packages_dir) if os.path.exists(blocklist_location): - for line in open(blocklist_location, 'r').readlines(): - _, source, _, parser_name = line.split('_', 3) + for line in open(blocklist_location, "r").readlines(): + _, source, _, parser_name = line.split("_", 3) blocklist.append(rf".*{parser_name}$") - self.log.info(f'Blocking the following parsers matching these patterns: {blocklist}') - self.cx = CX(parsers_dirs=self.rules_list, logger=self.log, parser_blocklist=blocklist) + self.log.info( + f"Blocking the following parsers matching these patterns: {blocklist}" + ) + self.cx = CX( + parsers_dirs=self.rules_list, + logger=self.log, + parser_blocklist=blocklist, + ) if not self.cx: - raise Exception("Unable to start ConfigExtractor because can't find directory containing parsers") + raise Exception( + "Unable to start ConfigExtractor because can't find directory containing parsers" + ) if not self.cx.parsers: raise Exception( - f"Unable to start ConfigExtractor because can't find parsers in given directory: {self.rules_directory}") + f"Unable to start ConfigExtractor because can't find parsers in given directory: {self.rules_directory}" + ) # Temporary tagging method until CAPE is switched over to MACO modelling def tag_output(self, output: Any, tags: dict = {}): def tag_string(value): if regex.search(IP_ONLY_REGEX, value): - tags['network.static.ip'].append(value) + tags["network.static.ip"].append(value) elif regex.search(DOMAIN_ONLY_REGEX, value): - tags['network.static.domain'].append(value) + tags["network.static.domain"].append(value) elif regex.search(FULL_URI, value): - tags['network.static.uri'].append(value) + tags["network.static.uri"].append(value) if isinstance(output, dict): # Iterate over valuse of dictionary @@ -93,27 +111,40 @@ def network_ioc_section(self, config) -> ResultSection: network_section = ResultSection("Network IOCs") network_fields = { - 'ftp': ExtractorModel.FTP, - 'smtp': ExtractorModel.SMTP, - 'http': ExtractorModel.Http, - 'ssh': ExtractorModel.SSH, - 'proxy': ExtractorModel.Proxy, - 'dns': ExtractorModel.DNS, - 'tcp': ExtractorModel.Connection, - 'udp': ExtractorModel.Connection + "ftp": ExtractorModel.FTP, + "smtp": ExtractorModel.SMTP, + "http": ExtractorModel.Http, + "ssh": ExtractorModel.SSH, + "proxy": ExtractorModel.Proxy, + "dns": ExtractorModel.DNS, + "tcp": ExtractorModel.Connection, + "udp": ExtractorModel.Connection, } for field, model in network_fields.items(): sorted_network_config = {} - for network_config in config.get(field, []): - sorted_network_config.setdefault(network_config.get('usage', 'other'), []).append(network_config) + for network_config in config.pop(field, []): + sorted_network_config.setdefault( + network_config.get("usage", "other"), [] + ).append(network_config) if sorted_network_config: - connection_section = ResultSection(field.upper(), parent=network_section) + connection_section = ResultSection( + field.upper(), parent=network_section + ) for usage, connections in sorted_network_config.items(): tags = list() - self.tag_output(connections, tags) - table_section = ResultTableSection(title_text=f"Usage: {usage.upper()} x{len(connections)}", parent=connection_section, heuristic=Heuristic(2, signature=usage), tags=tags) - [table_section.add_row(TableRow(**model(**c).dict())) for c in connections] + if usage not in ["decoy"]: + self.tag_output(connections, tags) + heuristic = Heuristic(2, signature=usage) + table_section = ResultTableSection( + title_text=f"Usage: {usage.upper()} x{len(connections)}", + parent=connection_section, + heuristic=heuristic, + tags=tags, + ) + for c in connections: + c.pop("usage") + table_section.add_row(TableRow(**model(**c).dict())) if network_section.subsections: return network_section @@ -128,20 +159,62 @@ def execute(self, request): a = tempfile.NamedTemporaryFile(delete=False) a.write(json.dumps(config_result).encode()) a.seek(0) - request.add_supplementary(a.name, f"{request.sha256}_malware_config.json", "Raw output from configextractor-py") + request.add_supplementary( + a.name, + f"{request.sha256}_malware_config.json", + "Raw output from configextractor-py", + ) for parser_framework, parser_results in config_result.items(): - framework_section = ResultSection(parser_framework, parent=result, auto_collapse=True) + framework_section = ResultSection( + parser_framework, parent=result, auto_collapse=True + ) for parser_name, parser_output in parser_results.items(): - config = parser_output.pop('config') - parser_output['family'] = config.pop('family') - id = f'{parser_framework}_{parser_name}' - id_details = self.source_map[id] - parser_section = ResultSection(title_text=parser_name, body=json.dumps(parser_output), - parent=framework_section, body_format=BODY_FORMAT.KEY_VALUE, - tags={'file.rule.configextractor': f"{id_details['source_name']}.{parser_name}"}, classification=id_details['classification']) + # Get AL-specific details about the parser + id = f"{parser_framework}_{parser_name}" + classification = self.source_map[id]["classification"] + source_name = self.source_map[id]["source_name"] + + config = parser_output.pop("config") + + parser_output["family"] = config.pop("family") + parser_output["version"] = config.pop("version") + + tags = { + "file.rule.configextractor": [f"{source_name}.{parser_name}"], + "attribution.family": [parser_output["family"]], + } + attack_ids = config.pop("attack", []) + if config.get("category"): + category = config.pop("category") + parser_output["category"] = category + + if config.get("password"): + password = config.pop("password", []) + parser_output["password"] = password + tags.update({"info.password": password}) + + if config.get("campaign_id"): + campaign_id = config.pop("campaign_id", []) + parser_output["Campaigh ID"] = campaign_id + tags.update({"attribution.campaign": campaign_id}) + + parser_section = ResultSection( + title_text=parser_name, + body=json.dumps(parser_output), + parent=framework_section, + body_format=BODY_FORMAT.KEY_VALUE, + tags=tags, + heuristic=Heuristic(1, attack_ids=attack_ids), + classification=classification, + ) network_section = self.network_ioc_section(config) if network_section: parser_section.add_subsection(network_section) - + ResultSection( + "Other data", + body=json.dumps(config), + body_format=BODY_FORMAT.JSON, + parent=parser_section, + ) request.result = result diff --git a/configextractor_/update_server.py b/configextractor_/update_server.py index e84ef35..3858e58 100644 --- a/configextractor_/update_server.py +++ b/configextractor_/update_server.py @@ -9,17 +9,29 @@ from assemblyline.common.isotime import epoch_to_iso from assemblyline.odm.models.signature import Signature from assemblyline_client import get_client -from assemblyline_v4_service.updater.updater import ServiceUpdater, temporary_api_key, UPDATER_DIR, UI_SERVER +from assemblyline_v4_service.updater.updater import ( + ServiceUpdater, + temporary_api_key, + UPDATER_DIR, + UI_SERVER, +) from configextractor.main import ConfigExtractor classification = forge.get_classification() + class CXUpdateServer(ServiceUpdater): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - def import_update(self, files_sha256, client, source_name, default_classification=classification.UNRESTRICTED): + def import_update( + self, + files_sha256, + client, + source_name, + default_classification=classification.UNRESTRICTED, + ): def import_parsers(cx: ConfigExtractor): upload_list = list() parser_paths = cx.parsers.keys() @@ -29,18 +41,31 @@ def import_parsers(cx: ConfigExtractor): parser_details = cx.get_details(parser_path) if parser_details: id = f"{parser_details['framework']}_{parser_details['name']}" - classification = parser_details['classification'] or default_classification - source_map[id] = dict(classification=classification, source_name=source_name) - upload_list.append(Signature(dict( - classification=classification, - data=open(parser_path, 'r').read(), - name=parser_details['name'], - signature_id=id, - source=source_name, - type='configextractor', - status="DEPLOYED", - )).as_primitives()) - return client.signature.add_update_many(source_name, 'configextractor', upload_list, dedup_name=False), source_map + classification = ( + parser_details["classification"] or default_classification + ) + source_map[id] = dict( + classification=classification, source_name=source_name + ) + upload_list.append( + Signature( + dict( + classification=classification, + data=open(parser_path, "r").read(), + name=parser_details["name"], + signature_id=id, + source=source_name, + type="configextractor", + status="DEPLOYED", + ) + ).as_primitives() + ) + return ( + client.signature.add_update_many( + source_name, "configextractor", upload_list, dedup_name=False + ), + source_map, + ) for dir, _ in files_sha256: # Remove cached duplicates @@ -50,7 +75,9 @@ def import_parsers(cx: ConfigExtractor): if cx.parsers: self.log.info(f"Found {len(cx.parsers)} parsers from {source_name}") resp, source_map = import_parsers(cx) - self.log.info(f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline.") + self.log.info( + f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline." + ) self.log.debug(resp) self.log.debug(source_map) @@ -58,29 +85,40 @@ def import_parsers(cx: ConfigExtractor): for root, _, files in os.walk(dir): for file in files: if file == "requirements.txt": - err = subprocess.run(['pip', 'install', - '-r', os.path.join(root, file), - '-t', os.path.join(self.latest_updates_dir, 'python_packages')]).stderr + err = subprocess.run( + [ + "pip", + "install", + "-r", + os.path.join(root, file), + "-t", + os.path.join( + self.latest_updates_dir, "python_packages" + ), + ], + capture_output=True, + ).stderr if err: self.log.error(err) - # Save a local copy of the directory that may potentially contain dependency libraries for the parsers try: destination = os.path.join(self.latest_updates_dir, source_name) - source_mapping_file = os.path.join(self.latest_updates_dir, 'source_mapping.json') + source_mapping_file = os.path.join( + self.latest_updates_dir, "source_mapping.json" + ) # Removing old version of directory if exists if os.path.exists(destination): shutil.rmtree(destination) shutil.move(dir, destination) if os.path.exists(source_mapping_file): - _tmp = json.loads(open(source_mapping_file, 'r').read()) + _tmp = json.loads(open(source_mapping_file, "r").read()) _tmp.update(source_map) source_map = _tmp - open(source_mapping_file, 'w').write(json.dumps(source_map)) + open(source_mapping_file, "w").write(json.dumps(source_map)) except shutil.Error as e: - if 'already exists' in str(e): + if "already exists" in str(e): continue raise e @@ -98,22 +136,32 @@ def do_local_update(self) -> None: # Check if new signatures have been added self.log.info("Check for new signatures.") - if al_client.signature.update_available(since=epoch_to_iso(old_update_time) or '', - sig_type=self.updater_type)['update_available']: - _, time_keeper = tempfile.mkstemp(prefix="time_keeper_", dir=UPDATER_DIR) + if al_client.signature.update_available( + since=epoch_to_iso(old_update_time) or "", sig_type=self.updater_type + )["update_available"]: + _, time_keeper = tempfile.mkstemp( + prefix="time_keeper_", dir=UPDATER_DIR + ) self.log.info("An update is available for download from the datastore") - self.log.debug(f"{self.updater_type} update available since {epoch_to_iso(old_update_time) or ''}") + self.log.debug( + f"{self.updater_type} update available since {epoch_to_iso(old_update_time) or ''}" + ) blocklisted_parsers = list() - [blocklisted_parsers.extend(list(item.values())) - for item in al_client.search.signature(f'type:{self.updater_type} AND status:DISABLED', - fl='id')['items']] - self.log.debug(f'Blocking the following parsers: {blocklisted_parsers}') + [ + blocklisted_parsers.extend(list(item.values())) + for item in al_client.search.signature( + f"type:{self.updater_type} AND status:DISABLED", fl="id" + )["items"] + ] + self.log.debug(f"Blocking the following parsers: {blocklisted_parsers}") output_directory = self.prepare_output_directory() - open(os.path.join(output_directory, 'blocked_parsers'), 'w').write('\n'.join(blocklisted_parsers)) + open(os.path.join(output_directory, "blocked_parsers"), "w").write( + "\n".join(blocklisted_parsers) + ) self.serve_directory(output_directory, time_keeper) -if __name__ == '__main__': +if __name__ == "__main__": with CXUpdateServer() as server: server.serve_forever() From 93560b8b5f01dcf3e67a97eb9284ecc0bc0c96b9 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Wed, 17 Aug 2022 19:43:29 +0000 Subject: [PATCH 15/23] version fix --- configextractor_/configextractor_.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index d857ba8..07630c3 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -184,9 +184,9 @@ def execute(self, request): "attribution.family": [parser_output["family"]], } attack_ids = config.pop("attack", []) - if config.get("category"): - category = config.pop("category") - parser_output["category"] = category + for field in ["category", "version"]: + if config.get(field): + parser_output[field] = config.pop(field) if config.get("password"): password = config.pop("password", []) From b1ebc5b93c3219a2e0ebcc634888dd5309acb794 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Thu, 18 Aug 2022 17:43:30 +0000 Subject: [PATCH 16/23] Cleanup nesting; fix pop bug --- configextractor_/configextractor_.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index 07630c3..db7731f 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -143,7 +143,7 @@ def network_ioc_section(self, config) -> ResultSection: tags=tags, ) for c in connections: - c.pop("usage") + c.pop("usage", None) table_section.add_row(TableRow(**model(**c).dict())) if network_section.subsections: @@ -165,9 +165,6 @@ def execute(self, request): "Raw output from configextractor-py", ) for parser_framework, parser_results in config_result.items(): - framework_section = ResultSection( - parser_framework, parent=result, auto_collapse=True - ) for parser_name, parser_output in parser_results.items(): # Get AL-specific details about the parser id = f"{parser_framework}_{parser_name}" @@ -177,7 +174,7 @@ def execute(self, request): config = parser_output.pop("config") parser_output["family"] = config.pop("family") - parser_output["version"] = config.pop("version") + parser_output["Framework"] = parser_framework tags = { "file.rule.configextractor": [f"{source_name}.{parser_name}"], @@ -201,7 +198,7 @@ def execute(self, request): parser_section = ResultSection( title_text=parser_name, body=json.dumps(parser_output), - parent=framework_section, + parent=result, body_format=BODY_FORMAT.KEY_VALUE, tags=tags, heuristic=Heuristic(1, attack_ids=attack_ids), From 24b451b60b5f9bfb432c0790edded9d72a03242c Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Fri, 19 Aug 2022 14:20:49 +0000 Subject: [PATCH 17/23] Fix blocking parsers --- configextractor_/configextractor_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index db7731f..36547cb 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -62,7 +62,7 @@ def _load_rules(self) -> None: sys.path.append(python_packages_dir) if os.path.exists(blocklist_location): - for line in open(blocklist_location, "r").readlines(): + for line in open(blocklist_location, "r").read().splitlines(): _, source, _, parser_name = line.split("_", 3) blocklist.append(rf".*{parser_name}$") self.log.info( From 12f9deabfaaedd8614466f77aff3e83c88f9f811 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Fri, 19 Aug 2022 16:19:31 +0000 Subject: [PATCH 18/23] define tagging for MACO network models --- configextractor_/configextractor_.py | 26 +++--- configextractor_/maco_tags.py | 113 +++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 11 deletions(-) create mode 100644 configextractor_/maco_tags.py diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index 36547cb..71d244d 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -20,6 +20,7 @@ import tempfile from configextractor.main import ConfigExtractor as CX +from configextractor_.maco_tags import extract_connection_tags, extract_DNS_tags, extract_FTP_tags, extract_HTTP_tags, extract_proxy_tags, extract_SMTP_tags, extract_SSH_tags from maco.model import ExtractorModel, ConnUsageEnum cl_engine = forge.get_classification() @@ -111,16 +112,16 @@ def network_ioc_section(self, config) -> ResultSection: network_section = ResultSection("Network IOCs") network_fields = { - "ftp": ExtractorModel.FTP, - "smtp": ExtractorModel.SMTP, - "http": ExtractorModel.Http, - "ssh": ExtractorModel.SSH, - "proxy": ExtractorModel.Proxy, - "dns": ExtractorModel.DNS, - "tcp": ExtractorModel.Connection, - "udp": ExtractorModel.Connection, + "ftp": (ExtractorModel.FTP, extract_FTP_tags), + "smtp": (ExtractorModel.SMTP, extract_SMTP_tags), + "http": (ExtractorModel.Http, extract_HTTP_tags), + "ssh": (ExtractorModel.SSH, extract_SSH_tags), + "proxy": (ExtractorModel.Proxy, extract_proxy_tags), + "dns": (ExtractorModel.DNS, extract_DNS_tags), + "tcp": (ExtractorModel.Connection, extract_connection_tags), + "udp": (ExtractorModel.Connection, extract_connection_tags), } - for field, model in network_fields.items(): + for field, model_tuple in network_fields.items(): sorted_network_config = {} for network_config in config.pop(field, []): sorted_network_config.setdefault( @@ -132,9 +133,9 @@ def network_ioc_section(self, config) -> ResultSection: field.upper(), parent=network_section ) for usage, connections in sorted_network_config.items(): - tags = list() + model, tag_extractor = model_tuple if usage not in ["decoy"]: - self.tag_output(connections, tags) + tags = tag_extractor(connections) heuristic = Heuristic(2, signature=usage) table_section = ResultTableSection( title_text=f"Usage: {usage.upper()} x{len(connections)}", @@ -207,11 +208,14 @@ def execute(self, request): network_section = self.network_ioc_section(config) if network_section: parser_section.add_subsection(network_section) + other_tags = {} + self.tag_output(config, other_tags) ResultSection( "Other data", body=json.dumps(config), body_format=BODY_FORMAT.JSON, parent=parser_section, + tags=other_tags ) request.result = result diff --git a/configextractor_/maco_tags.py b/configextractor_/maco_tags.py new file mode 100644 index 0000000..44477a0 --- /dev/null +++ b/configextractor_/maco_tags.py @@ -0,0 +1,113 @@ +# Documents how Model objects in the MACO standard translate to Assemblyline tags + +from typing import Dict, List + + +def extract_FTP_tags(data: List[Dict]) -> Dict: + tags = {'network.protocol': ['FTP']} + for d in data: + if d.get('password'): + tags.setdefault('info.password', []).append(d['password']) + if d.get('hostname'): + tags.setdefault('network.static.domain', []).append(d['hostname']) + if d.get('port'): + tags.setdefault('network.port', []).append(d['hostname']) + + if d.get('path'): + tags.setdefault('file.path', []).append(d['path']) + + return tags + + +def extract_SMTP_tags(data: List[Dict]) -> Dict: + tags = {'network.protocol': ['SMTP']} + for d in data: + if d.get('password'): + tags.setdefault('info.password', []).append(d['password']) + if d.get('hostname'): + tags.setdefault('network.static.domain', []).append(d['hostname']) + if d.get('port'): + tags.setdefault('network.port', []).append(d['hostname']) + + if d.get('mail_to'): + tags.setdefault('network.email.address', []).extend(d['mail_to']) + if d.get('mail_from'): + tags.setdefault('network.email.address', []).append(d['mail_from']) + if d.get('subject'): + tags.setdefault('network.email.subject', []).append(d['mail_from']) + + return tags + + +def extract_HTTP_tags(data: List[Dict]) -> Dict: + tags = {} + for d in data: + tags.setdefault('network.protocol', []).append(d.get('protocol', 'HTTP').upper()) + if d.get('password'): + tags.setdefault('info.password', []).append(d['password']) + if d.get('hostname'): + tags.setdefault('network.static.domain', []).append(d['hostname']) + if d.get('port'): + tags.setdefault('network.port', []).append(d['hostname']) + + if d.get('uri'): + tags.setdefault('network.static.uri', []).extend(d['uri']) + if d.get('path'): + tags.setdefault('network.static.uri_path', []).extend(d['path']) + if d.get('user_agent'): + tags.setdefault('network.user_agent', []).append(d['user_agent']) + + return tags + + +def extract_SSH_tags(data: List[Dict]) -> Dict: + tags = {'network.protocol': ['SSH']} + for d in data: + if d.get('password'): + tags.setdefault('info.password', []).append(d['password']) + if d.get('hostname'): + tags.setdefault('network.static.domain', []).append(d['hostname']) + if d.get('port'): + tags.setdefault('network.port', []).append(d['hostname']) + + return tags + + +def extract_proxy_tags(data: List[Dict]) -> Dict: + tags = {} + for d in data: + if d.get('protocol'): + tags.setdefault('network.protocol', []).append(d['protocol']) + if d.get('password'): + tags.setdefault('info.password', []).append(d['password']) + if d.get('hostname'): + tags.setdefault('network.static.domain', []).append(d['hostname']) + if d.get('port'): + tags.setdefault('network.port', []).append(d['hostname']) + + return tags + + +def extract_DNS_tags(data: List[Dict]) -> Dict: + tags = {} + for d in data: + if d.get('ip'): + tags.setdefault('network.static.ip', []).append(d['ip']) + if d.get('port'): + tags.setdefault('network.port', []).append(d['hostname']) + + return tags + + +def extract_connection_tags(data: List[Dict]) -> Dict: + tags = {} + for d in data: + for side in ['client', 'server']: + if d.get(f'{side}_ip'): + tags.setdefault('network.static.ip', []).append(d[f'{side}_ip']) + if d.get(f'{side}_port'): + tags.setdefault('network.port', []).append(d[f'{side}_port']) + if d.get(f'{side}_domain'): + tags.setdefault('network.static.domain', []).append(d[f'{side}_domain']) + + return tags From b3c74ce6d891389701575e4b3050ba1cd56d1b0e Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Fri, 19 Aug 2022 16:45:57 +0000 Subject: [PATCH 19/23] set default for misc. tagging --- configextractor_/configextractor_.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index 71d244d..6dc6a83 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -89,11 +89,11 @@ def _load_rules(self) -> None: def tag_output(self, output: Any, tags: dict = {}): def tag_string(value): if regex.search(IP_ONLY_REGEX, value): - tags["network.static.ip"].append(value) + tags.setdefault("network.static.ip", []).append(value) elif regex.search(DOMAIN_ONLY_REGEX, value): - tags["network.static.domain"].append(value) + tags.setdefault("network.static.domain", []).append(value) elif regex.search(FULL_URI, value): - tags["network.static.uri"].append(value) + tags.setdefault("network.static.uri", []).append(value) if isinstance(output, dict): # Iterate over valuse of dictionary From 4ddc335547f1e5dbf30285794868e9945c2f35ae Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Mon, 22 Aug 2022 15:09:46 +0000 Subject: [PATCH 20/23] don't display 'other' network IOCs --- configextractor_/configextractor_.py | 2 +- configextractor_/update_server.py | 56 ++++++++-------------------- 2 files changed, 16 insertions(+), 42 deletions(-) diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index 6dc6a83..aeae4f9 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -134,7 +134,7 @@ def network_ioc_section(self, config) -> ResultSection: ) for usage, connections in sorted_network_config.items(): model, tag_extractor = model_tuple - if usage not in ["decoy"]: + if usage not in ["decoy", "other"]: tags = tag_extractor(connections) heuristic = Heuristic(2, signature=usage) table_section = ResultTableSection( diff --git a/configextractor_/update_server.py b/configextractor_/update_server.py index 3858e58..a93f87e 100644 --- a/configextractor_/update_server.py +++ b/configextractor_/update_server.py @@ -9,12 +9,8 @@ from assemblyline.common.isotime import epoch_to_iso from assemblyline.odm.models.signature import Signature from assemblyline_client import get_client -from assemblyline_v4_service.updater.updater import ( - ServiceUpdater, - temporary_api_key, - UPDATER_DIR, - UI_SERVER, -) +from assemblyline_v4_service.updater.updater import ServiceUpdater, temporary_api_key, UPDATER_DIR, UI_SERVER + from configextractor.main import ConfigExtractor @@ -41,12 +37,8 @@ def import_parsers(cx: ConfigExtractor): parser_details = cx.get_details(parser_path) if parser_details: id = f"{parser_details['framework']}_{parser_details['name']}" - classification = ( - parser_details["classification"] or default_classification - ) - source_map[id] = dict( - classification=classification, source_name=source_name - ) + classification = parser_details["classification"] or default_classification + source_map[id] = dict(classification=classification, source_name=source_name) upload_list.append( Signature( dict( @@ -61,9 +53,7 @@ def import_parsers(cx: ConfigExtractor): ).as_primitives() ) return ( - client.signature.add_update_many( - source_name, "configextractor", upload_list, dedup_name=False - ), + client.signature.add_update_many(source_name, "configextractor", upload_list, dedup_name=False), source_map, ) @@ -75,9 +65,7 @@ def import_parsers(cx: ConfigExtractor): if cx.parsers: self.log.info(f"Found {len(cx.parsers)} parsers from {source_name}") resp, source_map = import_parsers(cx) - self.log.info( - f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline." - ) + self.log.info(f"Sucessfully added {resp['success']} parsers from source {source_name} to Assemblyline.") self.log.debug(resp) self.log.debug(source_map) @@ -87,14 +75,9 @@ def import_parsers(cx: ConfigExtractor): if file == "requirements.txt": err = subprocess.run( [ - "pip", - "install", - "-r", - os.path.join(root, file), - "-t", - os.path.join( - self.latest_updates_dir, "python_packages" - ), + "pip", "install", + "-r", os.path.join(root, file), + "-t", os.path.join(self.latest_updates_dir, "python_packages"), ], capture_output=True, ).stderr @@ -104,9 +87,7 @@ def import_parsers(cx: ConfigExtractor): # Save a local copy of the directory that may potentially contain dependency libraries for the parsers try: destination = os.path.join(self.latest_updates_dir, source_name) - source_mapping_file = os.path.join( - self.latest_updates_dir, "source_mapping.json" - ) + source_mapping_file = os.path.join(self.latest_updates_dir, "source_mapping.json") # Removing old version of directory if exists if os.path.exists(destination): shutil.rmtree(destination) @@ -136,9 +117,8 @@ def do_local_update(self) -> None: # Check if new signatures have been added self.log.info("Check for new signatures.") - if al_client.signature.update_available( - since=epoch_to_iso(old_update_time) or "", sig_type=self.updater_type - )["update_available"]: + if al_client.signature.update_available(since=epoch_to_iso(old_update_time) or "", + sig_type=self.updater_type)["update_available"]: _, time_keeper = tempfile.mkstemp( prefix="time_keeper_", dir=UPDATER_DIR ) @@ -148,17 +128,11 @@ def do_local_update(self) -> None: ) blocklisted_parsers = list() - [ - blocklisted_parsers.extend(list(item.values())) - for item in al_client.search.signature( - f"type:{self.updater_type} AND status:DISABLED", fl="id" - )["items"] - ] + [blocklisted_parsers.extend(list(item.values())) for item in + al_client.search.signature(f"type:{self.updater_type} AND status:DISABLED", fl="id")["items"]] self.log.debug(f"Blocking the following parsers: {blocklisted_parsers}") output_directory = self.prepare_output_directory() - open(os.path.join(output_directory, "blocked_parsers"), "w").write( - "\n".join(blocklisted_parsers) - ) + open(os.path.join(output_directory, "blocked_parsers"), "w").write("\n".join(blocklisted_parsers)) self.serve_directory(output_directory, time_keeper) From f96461a7147077633e0ed8cd0449fe1938474f8e Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Tue, 23 Aug 2022 11:16:26 +0000 Subject: [PATCH 21/23] Attach ontology --- configextractor_/configextractor_.py | 24 ++++++++++++++++++++++-- configextractor_/maco_tags.py | 4 ++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/configextractor_/configextractor_.py b/configextractor_/configextractor_.py index aeae4f9..64d35c8 100755 --- a/configextractor_/configextractor_.py +++ b/configextractor_/configextractor_.py @@ -1,7 +1,8 @@ from typing import Any -from assemblyline.common import forge +from assemblyline.common import forge, attack_map from assemblyline.odm.base import IP_ONLY_REGEX, FULL_URI, DOMAIN_ONLY_REGEX +from assemblyline.odm.models.ontology.results import MalwareConfig from assemblyline_v4_service.common.base import ServiceBase from assemblyline_v4_service.common.result import ( Result, @@ -150,6 +151,20 @@ def network_ioc_section(self, config) -> ResultSection: if network_section.subsections: return network_section + def attach_ontology(self, config: dict): + def strip_null(d: dict): + clean_config = {} + for k, v in d.items(): + if v: + if isinstance(v, dict): + clean_config[k] = strip_null(v) + elif isinstance(v, list) and isinstance(v[0], dict): + clean_config[k] = [strip_null(vi) for vi in v] + else: + clean_config[k] = v + return clean_config + self.ontology.add_result_part(MalwareConfig, strip_null(config)) + def execute(self, request): result = Result() config_result = self.cx.run_parsers(request.file_path) @@ -171,9 +186,14 @@ def execute(self, request): id = f"{parser_framework}_{parser_name}" classification = self.source_map[id]["classification"] source_name = self.source_map[id]["source_name"] - config = parser_output.pop("config") + # Correct revoked ATT&CK IDs + for i, v in enumerate(config.get('attack', [])): + config['attack'][i] = attack_map.revoke_map.get(v, v) + + self.attach_ontology(config) + parser_output["family"] = config.pop("family") parser_output["Framework"] = parser_framework diff --git a/configextractor_/maco_tags.py b/configextractor_/maco_tags.py index 44477a0..df38f08 100644 --- a/configextractor_/maco_tags.py +++ b/configextractor_/maco_tags.py @@ -51,9 +51,9 @@ def extract_HTTP_tags(data: List[Dict]) -> Dict: tags.setdefault('network.port', []).append(d['hostname']) if d.get('uri'): - tags.setdefault('network.static.uri', []).extend(d['uri']) + tags.setdefault('network.static.uri', []).append(d['uri']) if d.get('path'): - tags.setdefault('network.static.uri_path', []).extend(d['path']) + tags.setdefault('network.static.uri_path', []).append(d['path']) if d.get('user_agent'): tags.setdefault('network.user_agent', []).append(d['user_agent']) From 618cafbf5b61f1cd63446c590d21f3742e686295 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Tue, 23 Aug 2022 18:46:39 +0000 Subject: [PATCH 22/23] update documentation --- README.md | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f554b69..a2b887d 100755 --- a/README.md +++ b/README.md @@ -1,4 +1,25 @@ # ConfigExtractor Service -**NOTE** : All malware parsers, yara rules etc used in this service are open-source and available in the [ConfigExtractor](https://github.com/CybercentreCanada/configextractor-py) Python library. +This Assemblyline service extracts malware configurations (such as IP, URL and domain) for various malware family by leveraging the [ConfigExtractor Python library](https://github.com/CybercentreCanada/configextractor-py) for analysis. -This Assemblyline service extracts malware configurations (such as IP, URL and domain) for various malware family by leveraging the ConfigExtractor Python library for analysis. +## Updater + +### Sources +The updater for this service requires matches on directories containing parsers. + + +For example, the CAPE source will have a match pattern of `.*/modules/processing/parsers/CAPE/$` in which we're trying to target the parsers in this directory only. + +### Persistence +The updater assumes that you have attached a storage volume to store your collection of sources. Contrary to other services, this updater relies on a storage volume to maintain persistence rather than Assemblyline's datastore. + +### Python Packages +The updater is able to scan through the directory containing parsers and look for `requirements.txt` files and install Python packages to a directory that should get passed onto service instances. + +## [ConfigExtractor Python Library](https://github.com/CybercentreCanada/configextractor-py) + +All parser directories that are able to work with this library should also be compatible with the service. + +At the time of writing, we officially support the following frameworks: + - [MWCP](https://github.com/dod-cyber-crime-center/DC3-MWCP) + - [CAPE w/ MACO output](https://github.com/kevoreilly/CAPEv2) + - [MACO](https://github.com/CybercentreCanada/Maco) From d776d75d9a1132433e307293c24245f38ccce4e5 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Tue, 23 Aug 2022 18:49:48 +0000 Subject: [PATCH 23/23] remove old tests --- pipelines/azure-build.yaml | 3 - pipelines/azure-tests.yaml | 71 --- test/__init__.py | 0 test/requirements.txt | 3 - ...7d549fd8fec2a894dd15310053b0b8078064a5754b | 1 - test/test_configextractor.py | 475 ------------------ 6 files changed, 553 deletions(-) delete mode 100644 pipelines/azure-tests.yaml delete mode 100644 test/__init__.py delete mode 100644 test/requirements.txt delete mode 100644 test/samples/c805d89c6d26e6080994257d549fd8fec2a894dd15310053b0b8078064a5754b delete mode 100644 test/test_configextractor.py diff --git a/pipelines/azure-build.yaml b/pipelines/azure-build.yaml index 0ad07be..cb44a9a 100644 --- a/pipelines/azure-build.yaml +++ b/pipelines/azure-build.yaml @@ -27,9 +27,6 @@ stages: if [[ "$TAG" == *stable* ]]; then export BUILD_TYPE=stable; else export BUILD_TYPE=latest; fi docker build --build-arg version=$TAG --build-arg branch=$BUILD_TYPE -t cccs/assemblyline-service-configextractor:$TAG -t cccs/assemblyline-service-configextractor:$BUILD_TYPE -f ./Dockerfile . displayName: Build containers - # - script: | - # docker run -v `pwd`/test/:/opt/al_service/test/ cccs/assemblyline-service-configextractor:latest bash -c 'pip install -U -r test/requirements.txt; pytest' - # displayName: Test containers - script: | docker push cccs/assemblyline-service-configextractor --all-tags displayName: Deploy to Docker Hub diff --git a/pipelines/azure-tests.yaml b/pipelines/azure-tests.yaml deleted file mode 100644 index 68ef7a8..0000000 --- a/pipelines/azure-tests.yaml +++ /dev/null @@ -1,71 +0,0 @@ -name: tests - -trigger: ["*"] -pr: ["*"] - -pool: - vmImage: "ubuntu-20.04" - -jobs: - - job: run_test - strategy: - matrix: - python3_7: - python.version: "3.7" - Python3_8: - python.version: "3.8" - - timeoutInMinutes: 10 - - steps: - - task: UsePythonVersion@0 - displayName: Set python version - inputs: - versionSpec: "$(python.version)" - - script: | - install_path=`pwd` - echo "Setup YARA" - YARA_VERSION=4.1.0 - sudo apt-get update - sudo apt-get install -y libfuzzy-dev git libssl1.1 libmagic1 libssl-dev libmagic-dev automake libtool make gcc wget git - sudo rm -rf /var/lib/apt/lists/* - wget -O /tmp/yara.tar.gz https://github.com/VirusTotal/yara/archive/v$YARA_VERSION.tar.gz - tar -zxf /tmp/yara.tar.gz -C /tmp - cd /tmp/yara-$YARA_VERSION - ./bootstrap.sh - ./configure --enable-cuckoo --enable-magic --enable-dotnet --with-crypto --prefix /tmp/yara_install - make - make install - sudo cp -r /tmp/yara_install /usr/local - cd $install_path - - echo "Install Python packages" - sudo env "PATH=$PATH" python -m pip install -U --no-cache-dir assemblyline assemblyline_v4_service magic-yara-python gitpython plyara pyparsing==2.3.0 - sudo env "PATH=$PATH" python -m pip install -U -r `pwd`/test/requirements.txt - - echo "Install ConfigExtractor" - git clone --recurse-submodules https://github.com/CybercentreCanada/configextractor-py.git /tmp/configextractor-py - sudo env "PATH=$PATH" python -m pip install -U --no-cache-dir /tmp/configextractor-py/RATDecoders/ /tmp/configextractor-py/ - sudo mkdir /opt/al_service/ - - echo "Cloning CAPE parsers and patch in library" - git clone https://github.com/kevoreilly/CAPEv2.git /tmp/CAPEv2 - sudo rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/*.py_disabled - sudo rm -f /tmp/CAPEv2/modules/processing/parsers/CAPE/test_cape.py - - echo "Copying library and parsers only" - sudo mkdir -p /opt/al_service/CAPEv2/modules/processing/parsers/CAPE/ - sudo cp -r /tmp/CAPEv2/modules/processing/parsers/CAPE/* /opt/al_service/CAPEv2/modules/processing/parsers/CAPE/ - sudo mkdir -p /opt/al_service/CAPEv2/lib - sudo cp -r /tmp/CAPEv2/lib/* /opt/al_service/CAPEv2/lib/ - - sudo - CAPE_PARSERS_DIR=/opt/al_service/CAPEv2/modules/processing/parsers/CAPE/ - PYTHONPATH=$PYTHONPATH:/opt/al_service/CAPEv2/ - sudo mv /tmp/configextractor-py/dependencies /opt/al_service/dependencies - sudo rm -rf /tmp/* /var/lib/apt/lists/* ~/.cache/pip /tmp/configextractor-py/ - displayName: Setup environment - - script: python -m pytest --durations=10 -rsx -vv --cov-report=xml --cov=configextractor - displayName: Test - - script: python -m codecov - displayName: Upload Coverage diff --git a/test/__init__.py b/test/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/test/requirements.txt b/test/requirements.txt deleted file mode 100644 index 76e4fb4..0000000 --- a/test/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -pytest -pytest-cov -codecov \ No newline at end of file diff --git a/test/samples/c805d89c6d26e6080994257d549fd8fec2a894dd15310053b0b8078064a5754b b/test/samples/c805d89c6d26e6080994257d549fd8fec2a894dd15310053b0b8078064a5754b deleted file mode 100644 index e9ea42a..0000000 --- a/test/samples/c805d89c6d26e6080994257d549fd8fec2a894dd15310053b0b8078064a5754b +++ /dev/null @@ -1 +0,0 @@ -this is a text file diff --git a/test/test_configextractor.py b/test/test_configextractor.py deleted file mode 100644 index fbf885d..0000000 --- a/test/test_configextractor.py +++ /dev/null @@ -1,475 +0,0 @@ -import os -import json -import pytest -import shutil -import yaml -from configextractor import cli -cli.ROOT_DIR = '/opt/al_service/dependencies' -cli.init_root_dependencies() -cli.load_parsers() - -# Getting absolute paths, names and regexes -TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -ROOT_DIR = os.path.dirname(TEST_DIR) -SERVICE_CONFIG_NAME = "service_manifest.yml" -SERVICE_CONFIG_PATH = os.path.join(ROOT_DIR, SERVICE_CONFIG_NAME) -TEMP_SERVICE_CONFIG_PATH = os.path.join("/tmp", SERVICE_CONFIG_NAME) - -# Samples that we will be sending to the service -samples = [dict( - sid=1, - metadata={}, - service_name='configextractor', - service_config={'use_cape': True}, - fileinfo=dict( - magic='ASCII text, with no line terminators', - md5='fda4e701258ba56f465e3636e60d36ec', - mime='text/plain', - sha1='af2c2618032c679333bebf745e75f9088748d737', - sha256='c805d89c6d26e6080994257d549fd8fec2a894dd15310053b0b8078064a5754b', - size=19, - type='unknown', - ), - filename='c805d89c6d26e6080994257d549fd8fec2a894dd15310053b0b8078064a5754b', - min_classification='TLP:WHITE', - max_files=501, # TODO: get the actual value - ttl=3600, -), -] - - -def create_tmp_manifest(): - temp_service_config_path = os.path.join("/tmp", SERVICE_CONFIG_NAME) - if not os.path.exists(temp_service_config_path): - # Placing the service_manifest.yml in the tmp directory - shutil.copyfile(SERVICE_CONFIG_PATH, temp_service_config_path) - - -def remove_tmp_manifest(): - temp_service_config_path = os.path.join("/tmp", SERVICE_CONFIG_NAME) - if os.path.exists(temp_service_config_path): - os.remove(temp_service_config_path) - - -def return_result_section_class(): - create_tmp_manifest() - from assemblyline_v4_service.common.result import ResultSection - remove_tmp_manifest() - return ResultSection - - -@pytest.fixture -def class_instance(): - create_tmp_manifest() - try: - from configextractor_ import ConfigExtractor - yield ConfigExtractor() - finally: - remove_tmp_manifest() - - -@pytest.fixture -def parsers(): - from assemblyline.odm.models.tagging import Tagging - correct_yara_externals = {f'al_{x.replace(".", "_")}': "" for x in Tagging.flat_fields().keys()} - correct_yara_externals['al_file_rule_yara'] = "" - return cli.compile(correct_yara_externals) - - -def get_section_builder_inputs() -> list: - possible_inputs_for_section_builder = [] - parser_names = yaml.safe_load(open(cli.YARA_PARSER_PATH, 'r').read()).keys() - parser_types = ["MWCP", "RATDecoder"] - field_dict = { - "address": ['999'], - "other": { - "a": "b" - }, - "not_in_field_map": True - } - for parser_name in parser_names: - for parser_type in parser_types: - possible_inputs_for_section_builder.append((parser_name, field_dict, parser_type)) - return possible_inputs_for_section_builder - - -def get_classification_checker_inputs() -> list: - ResultSection = return_result_section_class() - - data_for_result_sections = get_section_builder_inputs() - possible_inputs_for_classification_checker = [] - for parser_name, field_dict, parser_type in data_for_result_sections: - res_sec = ResultSection(f"{parser_type} : {parser_name}") - possible_inputs_for_classification_checker.append((res_sec, parser_name)) - return possible_inputs_for_classification_checker - - -def get_subsection_builder_inputs() -> list: - ResultSection = return_result_section_class() - - parent_result_section = ResultSection("parent") - field_dict = { - "address": ["list_sample"], - "c2_address": [["nested_list_sample"]], - "c2_url": [["nested_list_sample"], "list_sample"] - } - possible_inputs_for_subsection_builder = [(parent_result_section, field_dict)] - return possible_inputs_for_subsection_builder - - -def check_section_equality(this, that) -> bool: - # Recursive method to check equality of result section and nested sections - - # Heuristics also need their own equality checks - if this.heuristic and that.heuristic: - result_heuristic_equality = this.heuristic.attack_ids == that.heuristic.attack_ids and \ - this.heuristic.frequency == that.heuristic.frequency and \ - this.heuristic.heur_id == that.heuristic.heur_id and \ - this.heuristic.score == that.heuristic.score and \ - this.heuristic.score_map == that.heuristic.score_map and \ - this.heuristic.signatures == that.heuristic.signatures - - elif not this.heuristic and not that.heuristic: - result_heuristic_equality = True - else: - result_heuristic_equality = False - - # Assuming we are given the "root section" at all times, it is safe to say that we don't need to confirm parent - current_section_equality = result_heuristic_equality and \ - this.body == that.body and \ - this.body_format == that.body_format and \ - this.classification == that.classification and \ - this.depth == that.depth and \ - len(this.subsections) == len(that.subsections) and \ - this.title_text == that.title_text - - if not current_section_equality: - return False - - for index, subsection in enumerate(this.subsections): - subsection_equality = check_section_equality(subsection, that.subsections[index]) - if not subsection_equality: - return False - - return True - - -def check_reporter_equality(this, that) -> bool: - # Checks all mwcp.Report attributes except for managed_tempdir - reporter_equality = this.errors == that.errors and this.finalized == that.finalized \ - and this.input_file == that.input_file \ - and {x: sorted(this.metadata[x]) for x in this.metadata.keys()} == that.metadata \ - and this.parser == that.parser - if not reporter_equality: - return reporter_equality - - # Also in the case where a metadata list exists, the order does not matter, so check as such - metadata_equality = this.metadata.keys() == that.metadata.keys() - if not metadata_equality: - return metadata_equality - - for key, value in this.metadata.items(): - if not metadata_equality: - return metadata_equality - if type(value) == list: - if len(value) != len(that.metadata[key]): - return False - for item in value: - if item not in that.metadata[key]: - return False - else: - metadata_equality = value == that.metadata[key] - - return reporter_equality and metadata_equality - - -def create_correct_result_section_tree(fields, parsers=None, parser_type=None, parser_name=None): - from configextractor_ import FIELD_TAG_MAP, tag_network_ioc - from assemblyline_v4_service.common.result import BODY_FORMAT - from assemblyline.common import forge - cl_engine = forge.get_classification() - ResultSection = return_result_section_class() - other_key = "other" - ratdecoder = "RATDecoder" - mwcp = "MWCP" - malware_name = '' - malware_types = [] - mitre_group = '' - mitre_att = '' - category = 'malware' - correct_file_parsers = {} - - if parser_type not in [ratdecoder, mwcp] or not parser_name: - correct_parent_section = ResultSection("parent") - else: - [correct_file_parsers.update(p) for p in parsers] - correct_parent_section = ResultSection(f"{parser_type} : {parser_name}") - - parser_attributes = {} - if parser_type == mwcp: - obj = correct_file_parsers[parser_name] - for item in ['classification', 'mitre_group', 'mitre_att', - 'malware', 'malware_types', 'category']: - val = getattr(obj, item, None) - if val: - parser_attributes[item] = val - malware_name = obj.malware - malware_types = obj.malware_types - mitre_att = obj.mitre_att - mitre_group = obj.mitre_group - category = obj.category - elif parser_type == ratdecoder: - malware_name = parser_name - - if correct_file_parsers: - parser_classification = correct_file_parsers[parser_name].classification - correct_classification = cl_engine.normalize_classification(parser_classification) - correct_parent_section.classification = correct_classification - - if fields and parser_type: - from configextractor_ import HEURISTICS_MAP - correct_parent_section.set_body(json.dumps(parser_attributes), body_format=BODY_FORMAT.KEY_VALUE) - correct_parent_section.set_heuristic(HEURISTICS_MAP.get(category, 1), attack_id=mitre_att) - correct_parent_section.add_tag("source", parser_type) - if malware_name: - correct_parent_section.add_tag('attribution.implant', malware_name.upper()) - if mitre_group: - correct_parent_section.add_tag('attribution.actor', mitre_group.upper()) - for malware_type in malware_types: - correct_parent_section.add_tag('attribution.family', malware_type.upper()) - - # subsection section - for key, value in fields.items(): - if key in FIELD_TAG_MAP: - tag = FIELD_TAG_MAP[key] - body = [] - for field in value: - if type(field) is str: - body.append({key: field}) - elif type(field) is list: - body.extend([{key: item} for item in field]) - - correct_subsection = ResultSection( - title_text=f"Extracted {key.capitalize()}", - body=json.dumps(body), - body_format=BODY_FORMAT.TABLE, - ) - if 'uri' in tag: - tag_network_ioc(correct_subsection, value) - else: - for v in value: - correct_subsection.add_tag(tag, value) - correct_parent_section.add_subsection(correct_subsection) - - # Other key section comes after all subsection builder - if other_key in fields: - other_content = fields[other_key] - other_section = ResultSection( - title_text=f"Other metadata found", - body_format=BODY_FORMAT.KEY_VALUE, - body=json.dumps(other_content) - ) - correct_parent_section.add_subsection(other_section) - return correct_parent_section - - -def yield_sample_file_paths(): - samples_path = os.path.join(TEST_DIR, "samples") - # For some reason os.listdir lists the same file twice, but with a trailing space on the second entry - paths = set([path.rstrip() for path in os.listdir(samples_path)]) - for sample in paths: - yield os.path.join(samples_path, sample) - - -class TestConfigExtractor: - - @classmethod - def setup_class(cls): - # Placing the samples in the tmp directory - samples_path = os.path.join(TEST_DIR, "samples") - for sample in os.listdir(samples_path): - sample_path = os.path.join(samples_path, sample) - shutil.copyfile(sample_path, os.path.join("/tmp", sample)) - - @classmethod - def teardown_class(cls): - # Cleaning up the tmp directory - samples_path = os.path.join(TEST_DIR, "samples") - for sample in os.listdir(samples_path): - temp_sample_path = os.path.join("/tmp", sample) - os.remove(temp_sample_path) - - @staticmethod - def test_init(class_instance): - from configextractor.cli import register - assert class_instance.file_parsers == {} - assert class_instance.tag_parsers is None - assert class_instance.parser_classification == [] - - @staticmethod - def test_start(class_instance, parsers): - correct_file_parsers, correct_tag_parsers = parsers - class_instance.start() - # Check if indeed the expected file and tag parsers are the actual file and tag parsers - assert class_instance.file_parsers == correct_file_parsers - assert class_instance.tag_parsers == correct_tag_parsers - - @staticmethod - @pytest.mark.parametrize("sample", - samples - ) - def test_execute(sample, class_instance): - # Imports required to execute the sample - from assemblyline_v4_service.common.task import Task - from assemblyline.odm.messages.task import Task as ServiceTask - from assemblyline_v4_service.common.request import ServiceRequest - - # Creating the required objects for execution - service_task = ServiceTask(sample) - task = Task(service_task) - class_instance._task = task - service_request = ServiceRequest(task) - - # Actually executing the sample - # task.service_config = {} - class_instance.execute(service_request) - - @staticmethod - @pytest.mark.parametrize("parser,field_dict,parsertype", - get_section_builder_inputs() - ) - def test_section_builder(parser, field_dict, parsertype, class_instance, parsers): - from assemblyline_v4_service.common.result import Result - result = Result() - correct_parsers = parsers[0] if parser in parsers[0].keys() else parsers[1] - correct_sections = create_correct_result_section_tree(field_dict, parsers, parsertype, parser) - class_instance.file_parsers = correct_parsers - class_instance.section_builder(parser=parser, field_dict=field_dict, result=result, parsertype=parsertype) - - assert check_section_equality(result.sections[0], correct_sections) - - @staticmethod - @pytest.mark.parametrize("res_section,parser_name", - get_classification_checker_inputs() - ) - def test_classification_checker(res_section, parser_name, parsers): - from configextractor_ import classification_checker - from assemblyline.common import forge - cl_engine = forge.get_classification() - - correct_file_parsers = parsers[0] if parser_name in parsers[0].keys() else parsers[1] - parser_classification = correct_file_parsers[parser_name].classification - correct_classification = cl_engine.normalize_classification(parser_classification) - - # TODO: Note that classification_checker() only needs the parser classification for the passed parser_name, - # not all parsers - test_res_section = classification_checker(res_section=res_section, - parser_name=parser_name, file_parsers=correct_file_parsers) - assert test_res_section.classification == correct_classification - - @staticmethod - @pytest.mark.parametrize("parent_section,fields", - get_subsection_builder_inputs() - ) - def test_subsection_builder(parent_section, fields): - from configextractor_ import subsection_builder - correct_parent_section = create_correct_result_section_tree(fields) - subsection_builder(parent_section=parent_section, fields=fields) - assert check_section_equality(parent_section, correct_parent_section) - - -def get_parser_entries(): - import yaml - stream = open(cli.YARA_PARSER_PATH, 'r') - parser_entries = yaml.full_load(stream) - return parser_entries - - -def get_validate_parser_inputs(): - possible_inputs_for_validate_parser = [] - parser_entries = get_parser_entries() - incorrect_key = "incorrect" - - for parser_entry in parser_entries.values(): - possible_inputs_for_validate_parser.append(parser_entry["parser"]) - possible_inputs_for_validate_parser.append([{incorrect_key: [incorrect_key]}]) - return possible_inputs_for_validate_parser - - -def get_report(): - import mwcp - mwcp.register_entry_points() - mwcp.register_parser_directory(cli.MWCP_PARSERS_DIR_PATH) - reporter = mwcp.Report() - return reporter - - -def add_metadata(data, mwcp_key, correct_report=None): - if not correct_report: - correct_report = get_report() - for val in data.values(): - correct_report.add_metadata(mwcp_key, val) - return correct_report - - -def create_correct_parser_objs(tags=None): - import yara - from configextractor.cli import check_paths, validate_parsers, Parser - - parser_entries = get_parser_entries() - parser_objs = {} - for parser_name, parser_details in parser_entries.items(): - rule_source_paths = [] - # if tags are present then get tag rule paths - - if tags and 'tag' in parser_details['selector']: - rule_source_paths = parser_details['selector']['tag'] - elif not tags and 'yara_rule' in parser_details['selector']: - rule_source_paths = parser_details['selector']['yara_rule'] - if not check_paths(rule_source_paths): - continue - validated_parsers = validate_parsers(parser_details['parser']) - compiled_rules = [] - for rule_source_path in rule_source_paths: - abs_path = os.path.join(ROOT_DIR, rule_source_path) - if tags: - rule = yara.compile(filepath=abs_path, externals=tags) - else: - rule = yara.compile(filepath=abs_path) - compiled_rules.append(rule) - parser_objs[parser_name] = Parser( - name=parser_name, - parser_list=validated_parsers, - compiled_rules=compiled_rules, - classification=parser_details['classification'], - malware=parser_details['malware'], - malware_types=parser_details['malware_type'], - mitre_group=parser_details['mitre_group'], - mitre_att=parser_details['mitre_att'], - category=parser_details['category'], - run_on=parser_details['run_on'] - ) - return parser_objs - - -def get_tags(): - from assemblyline.odm.models.tagging import Tagging - tags = {f'al_{x.replace(".", "_")}': "" for x in Tagging.flat_fields().keys()} - tags["al_file_rule_yara"] = "" - return tags - - -def get_new_tags(): - request_task_tags = {"a": "b"} - - tags = {f"al_{k.replace('.', '_')}": i for k, i in request_task_tags.items()} - newtags = {} - # yara externals must be dicts w key value pairs being strings - for k, v in tags.items(): - key = f"al_{k.replace('.', '_')}" - for i in range(len(v)): - if not isinstance(v[i], str): - v[i] = str(v[i]) - value = " | ".join(v) - newtags[key] = value - return newtags