From dc3615da17cfe226861da0eb06f49cf6c5b52b1c Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 16 Jun 2021 21:13:07 +0200 Subject: [PATCH] harm: move dga domain classification to other move "dga domain" to the taxonomy other, as it is not accepted in RSIT rename to "dga-domain" to match the name scheme of other types adapt bots depending on the name add changelog and news entries, including SQL update statements --- CHANGELOG.md | 5 +++-- NEWS.md | 8 +++++++- docs/dev/data-format.rst | 4 ++-- intelmq/bots/experts/idea/expert.py | 4 ++-- intelmq/bots/experts/taxonomy/expert.py | 2 +- intelmq/bots/parsers/bambenek/parser.py | 2 +- intelmq/lib/harmonization.py | 4 +++- intelmq/tests/bots/parsers/bambenek/test_parser.py | 2 +- intelmq/tests/lib/test_harmonization.py | 2 ++ 9 files changed, 22 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 881f09cf7..12dfadce0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,7 +38,7 @@ CHANGELOG ### Data Format The IntelMQ Data Harmonization ("DHO") is renamed to IntelMQ Data Format ("IDF"). Internal files remain and work the same as before (PR#1818 by Sebastian Waldbauer, fixes 1810). -Update allowed classification fields to 2020-01-28 version (#1409, #1476). Old namings are still supported until at least version 3.0. +Update allowed classification fields to 2020-01-28 version (#1409, #1476). - The taxonomy `abusive content` has been renamed to `abusive-content`. - The taxonomy `information content security` has been renamed to `information-content-security`. - The validation of type `unauthorised-information-access` has been fixed, a bug prevented the use of it. @@ -48,8 +48,9 @@ Update allowed classification fields to 2020-01-28 version (#1409, #1476). Old n - The taxonomy `information gathering` has been rename to `information-gathering`. - The taxonomy `malicious code` has been renamed to `malicious-code`. - The type `c2server` has been renamed to `c2-server`. - - The type `malware` has been integrated into `infected-system` and `malware-distribution`, respectively. + - The type `malware` has been integrated into `infected-system` and `malware-distribution`, respectively (PR#1917 by Sebastian Wagner addresses #1409). - The type `ransomware` has been integrated into `infected-system`. + - The type `dga domain` has been moved to the taxonomy `other` renamed `dga-domain` (PR#1992 by Sebastian Wagner fixes #1613). - For the taxonomy 'availability', the type `misconfiguration` is new. - For the taxonomy 'other', the type `unknown` has been renamed to `undetermined`. - For the taxonomy 'vulnerable': diff --git a/NEWS.md b/NEWS.md index 992d12415..398872143 100644 --- a/NEWS.md +++ b/NEWS.md @@ -51,6 +51,7 @@ The classification scheme has been updated to better match the [Reference Securi | malicious code | | malicious-code | | | malicious code | c2server | malicious-code | c2-server | | malicious code | malware | malicious-code | infected-system / malware-distribution | +| malicious code | dga domain | other | dga-domain | | malicious code | malware | other | malware | | malicious code | ransomware | malicious-code | infected-system | | vulnerable | vulnerable client | vulnerable | vulnerable-system | @@ -60,7 +61,9 @@ The classification scheme has been updated to better match the [Reference Securi - For the taxonomy 'availability', the type `misconfiguration` is new. - For the taxonomy 'other', the types `malware` and `undetermined` are new. -The old names can still be used in code, and they are automatically converted to the new names. +The old `classification.type` names can still be used in code, and they are automatically converted to the new names. +Existing data in databases and alike are *not* changed automatically. +See the section "Postgres databases" below for instructions to update existing data in databases. #### "Malware" @@ -109,6 +112,9 @@ UPDATE events UPDATE events SET "classification.type" = 'undetermined' WHERE "classification.taxonomy" = 'other' AND "classification.type" = 'unknown'; +UPDATE events + SET "classification.taxonomy" = 'other', "classification.type" = 'dga-domain' + WHERE "classification.taxonomy" = 'malicious-code' AND "classification.type" = 'dga domain'; ``` Depending on the data (e.g. feed), the correct statement for the `malware` type deprecation may be either this: ```sql diff --git a/docs/dev/data-format.rst b/docs/dev/data-format.rst index 408e6aa36..8cb4b6cc3 100644 --- a/docs/dev/data-format.rst +++ b/docs/dev/data-format.rst @@ -137,11 +137,11 @@ The taxonomy can be automatically added by the taxonomy expert bot based on the intrusions unauthorized-login A possibly infected device logged in to a remote device without authorization. Not in ENISA eCSIRT-II taxonomy. intrusions unprivileged-account-compromise Compromise of a system using an unprivileged (user/service) account. malicious-code c2-server This is a command and control server in charge of a given number of botnet drones. - malicious-code dga domain DGA Domains are seen various families of malware that are used to periodically generate a large number of domain names that can be used as rendezvous points with their command and control servers. Not in ENISA eCSIRT-II taxonomy. malicious-code infected-system This is a compromised machine, which has been observed to make a connection to a command and control server. malicious-code malware-configuration This is a resource which updates botnet drones with a new configuration. malicious-code malware-distribution URI used for malware distribution, e.g. a download URL included in fake invoice malware spam. other blacklist Some sources provide blacklists, which clearly refer to abusive behavior, such as spamming, but fail to denote the exact reason why a given identity has been blacklisted. The reason may be that the justification is anecdotal or missing entirely. This type should only be used if the typing fits the definition of a blacklist, but an event specific denomination is not possible for one reason or another. Not in RSIT. + other dga-domain DGA Domains are seen various families of malware that are used to periodically generate a large number of domain names that can be used as rendezvous points with their command and control servers. Not in RSIT. other other All incidents which don't fit in one of the given categories should be put into this class. other malware An IoC referring to a malware (sample) itself. Not in RSIT. other proxy This refers to the use of proxies from inside your network. Not in RSIT. @@ -170,7 +170,7 @@ Meaning of source, destination and local values for each classification type and compromised *server* ddos *attacker* target defacement *defaced website* - dga domain *infected device* + dga-domain *infected device* dropzone *server hosting stolen data* exploit *hosting server* ids-alert *triggering device* diff --git a/intelmq/bots/experts/idea/expert.py b/intelmq/bots/experts/idea/expert.py index 1b1823905..c4bcb597f 100644 --- a/intelmq/bots/experts/idea/expert.py +++ b/intelmq/bots/experts/idea/expert.py @@ -46,7 +46,7 @@ class IdeaExpertBot(Bot): "compromised": "Intrusion.AdminCompromise", "backdoor": "Intrusion.AdminCompromise", "blacklist": "Other", - "dga domain": "Anomaly.Behaviour", + "dga-domain": "Anomaly.Behaviour", "proxy": "Vulnerable.Config", "data-leak": "Information", "tor": "Other", @@ -90,7 +90,7 @@ class IdeaExpertBot(Bot): "dropzone": "Dropzone", "malware-configuration": "MalwareConf", "c2-server": "CC", - "dga domain": "DGA", + "dga-domain": "DGA", "proxy": "Proxy", "tor": "Tor", "malware-distribution": "Malware" diff --git a/intelmq/bots/experts/taxonomy/expert.py b/intelmq/bots/experts/taxonomy/expert.py index e1c53399a..ad9da13cb 100644 --- a/intelmq/bots/experts/taxonomy/expert.py +++ b/intelmq/bots/experts/taxonomy/expert.py @@ -50,7 +50,7 @@ "unauthorized-login": "intrusions", # not in ENISA eCSIRT-II taxonomy "unprivileged-account-compromise": "intrusions", "c2-server": "malicious-code", - "dga domain": "malicious-code", # not in ENISA eCSIRT-II taxonomy + "dga-domain": "other", # intentionally not in RSIT, see #1409, #1613 and https://github.com/enisaeu/Reference-Security-Incident-Taxonomy-Task-Force/pull/32 "infected-system": "malicious-code", "malware-configuration": "malicious-code", "malware-distribution": "malicious-code", diff --git a/intelmq/bots/parsers/bambenek/parser.py b/intelmq/bots/parsers/bambenek/parser.py index 4f6a01dc6..cfc277948 100644 --- a/intelmq/bots/parsers/bambenek/parser.py +++ b/intelmq/bots/parsers/bambenek/parser.py @@ -65,7 +65,7 @@ def parse_line(self, line, report): elif report['feed.url'] in BambenekParserBot.DGA_FEED: event.add('source.fqdn', value[0]) event.add('time.source', value[2] + ' 00:00 UTC') - event.add('classification.type', 'dga domain') + event.add('classification.type', 'dga-domain') else: raise ValueError('Unknown data feed %s.' % report['feed.url']) diff --git a/intelmq/lib/harmonization.py b/intelmq/lib/harmonization.py index c120c9c6f..8aed95b40 100644 --- a/intelmq/lib/harmonization.py +++ b/intelmq/lib/harmonization.py @@ -282,7 +282,7 @@ class ClassificationType(String): 'ddos', 'ddos-amplifier', 'defacement', - 'dga domain', + 'dga-domain', 'dos', 'dropzone', 'exploit', @@ -371,6 +371,8 @@ def sanitize(value: str) -> Optional[str]: value = 'infected-system' elif value == 'unknown': value = 'undetermined' + elif value == 'dga domain': + value = 'dga-domain' return GenericType().sanitize(value) diff --git a/intelmq/tests/bots/parsers/bambenek/test_parser.py b/intelmq/tests/bots/parsers/bambenek/test_parser.py index a99cd694d..278c16a21 100644 --- a/intelmq/tests/bots/parsers/bambenek/test_parser.py +++ b/intelmq/tests/bots/parsers/bambenek/test_parser.py @@ -77,7 +77,7 @@ 'raw': 'eHFtY2xudXNhc3d2b2YuY29tLERvbWFpbiB1c2VkIGJ5IENyeXB0b2xvY2tlciAtIEZsYXNoYmFjayBER0EgZm9yIDEwIE5vdiAyMDE2LDIwMTYtMTEtMTAsaHR0cDovL29zaW50LmJhbWJlbmVrY29uc3VsdGluZy5jb20vbWFudWFsL2NsLnR4dA==', 'time.source': '2016-11-10T00:00:00+00:00', 'source.fqdn': 'xqmclnusaswvof.com', - 'classification.type': 'dga domain', + 'classification.type': 'dga-domain', 'malware.name': 'cryptolocker', 'event_description.text': 'Domain used by Cryptolocker - Flashback DGA for 10 Nov 2016', 'event_description.url': 'http://osint.bambenekconsulting.com/manual/cl.txt' diff --git a/intelmq/tests/lib/test_harmonization.py b/intelmq/tests/lib/test_harmonization.py index 909038627..9d8b59d0d 100644 --- a/intelmq/tests/lib/test_harmonization.py +++ b/intelmq/tests/lib/test_harmonization.py @@ -514,6 +514,8 @@ def test_classification_type_sanitize(self): sanitize=True)) self.assertTrue(harmonization.ClassificationType.is_valid('infected system ', sanitize=True)) + self.assertEqual(harmonization.ClassificationType.sanitize('dga domain'), + 'dga-domain') def test_classification_type_sanitize_invalid(self): """ Test ClassificationType.is_valid with invalid arguments. """