diff --git a/CHANGELOG.next.md b/CHANGELOG.next.md index 1984f60434..d366755da5 100644 --- a/CHANGELOG.next.md +++ b/CHANGELOG.next.md @@ -38,6 +38,7 @@ Thanks, you're awesome :-) --> #### Added * Introduced `--strict` flag to perform stricter schema validation when running the generator script. #937 +* Added check under `--strict` that ensures composite types in example fields are quoted. #966 #### Improvements diff --git a/USAGE.md b/USAGE.md index ffa2ca2e06..e70da6b14f 100644 --- a/USAGE.md +++ b/USAGE.md @@ -308,6 +308,7 @@ $ python/generator.py --strict Strict mode requires the following conditions, else the script exits on an exception: * Short descriptions must be less than or equal to 120 characters. +* Example values containing arrays or objects must be quoted to avoid unexpected YAML interpretation when the schema files or artifacts are relied on downstream. The current artifacts generated and published in the ECS repo will always be created using strict mode. However, older ECS versions (pre `v1.5.0`) will cause an exception if attempting to generate them using `--strict`. This is due to schema validation checks introduced after that version was released. diff --git a/docs/field-details.asciidoc b/docs/field-details.asciidoc index da97a3c5f8..2f06d7194d 100644 --- a/docs/field-details.asciidoc +++ b/docs/field-details.asciidoc @@ -1211,7 +1211,7 @@ Note: this field should contain an array of values. -example: `['RD', 'RA']` +example: `["RD", "RA"]` | extended @@ -1343,7 +1343,7 @@ Note: this field should contain an array of values. -example: `['10.10.10.10', '10.10.10.11']` +example: `["10.10.10.10", "10.10.10.11"]` | extended @@ -4205,7 +4205,7 @@ Note: this field should contain an array of values. -example: `['/usr/bin/ssh', '-l', 'user', '10.0.0.16']` +example: `["/usr/bin/ssh", "-l", "user", "10.0.0.16"]` | extended @@ -4718,7 +4718,7 @@ Note: this field should contain an array of values. -example: `['Star-Lord']` +example: `["Star-Lord"]` | extended @@ -5624,7 +5624,7 @@ Note: this field should contain an array of values. -example: `['MII...', 'MII...']` +example: `["MII...", "MII..."]` | extended @@ -5757,7 +5757,7 @@ Note: this field should contain an array of values. -example: `['TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384', 'TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384', '...']` +example: `["TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", "..."]` | extended @@ -5838,7 +5838,7 @@ Note: this field should contain an array of values. -example: `['MII...', 'MII...']` +example: `["MII...", "MII..."]` | extended diff --git a/generated/beats/fields.ecs.yml b/generated/beats/fields.ecs.yml index c632daaead..573abe8499 100644 --- a/generated/beats/fields.ecs.yml +++ b/generated/beats/fields.ecs.yml @@ -1013,9 +1013,7 @@ description: 'Array of 2 letter DNS header flags. Expected values are: AA, TC, RD, RA, AD, CD, DO.' - example: - - RD - - RA + example: '["RD", "RA"]' - name: id level: extended type: keyword @@ -1096,9 +1094,7 @@ formats it can contain. Extracting all IP addresses seen in there to `dns.resolved_ip` makes it possible to index them as IP addresses, and makes them easier to visualize and query for.' - example: - - 10.10.10.10 - - 10.10.10.11 + example: '["10.10.10.10", "10.10.10.11"]' - name: response_code level: extended type: keyword @@ -3229,11 +3225,7 @@ the executable. May be filtered to protect sensitive information.' - example: - - /usr/bin/ssh - - -l - - user - - 10.0.0.16 + example: '["/usr/bin/ssh", "-l", "user", "10.0.0.16"]' - name: args_count level: extended type: long @@ -3376,11 +3368,7 @@ the executable. May be filtered to protect sensitive information.' - example: - - /usr/bin/ssh - - -l - - user - - 10.0.0.16 + example: '["/usr/bin/ssh", "-l", "user", "10.0.0.16"]' default_field: false - name: parent.args_count level: extended @@ -3884,8 +3872,7 @@ ignore_above: 1024 description: Name, organization, or pseudonym of the author or authors who created the rule used to generate this event. - example: - - Star-Lord + example: '["Star-Lord"]' default_field: false - name: category level: extended @@ -4652,9 +4639,7 @@ description: Array of PEM-encoded certificates that make up the certificate chain offered by the client. This is usually mutually-exclusive of `client.certificate` since that value should be the first certificate in the chain. - example: - - MII... - - MII... + example: '["MII...", "MII..."]' default_field: false - name: client.hash.md5 level: extended @@ -4735,10 +4720,8 @@ type: keyword ignore_above: 1024 description: Array of ciphers offered by the client during the client hello. - example: - - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 - - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 - - '...' + example: '["TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", + "..."]' default_field: false - name: client.x509.alternative_names level: extended @@ -4955,9 +4938,7 @@ description: Array of PEM-encoded certificates that make up the certificate chain offered by the server. This is usually mutually-exclusive of `server.certificate` since that value should be the first certificate in the chain. - example: - - MII... - - MII... + example: '["MII...", "MII..."]' default_field: false - name: server.hash.md5 level: extended diff --git a/generated/ecs/ecs_flat.yml b/generated/ecs/ecs_flat.yml index c27228d794..43a72942f3 100644 --- a/generated/ecs/ecs_flat.yml +++ b/generated/ecs/ecs_flat.yml @@ -1384,9 +1384,7 @@ dns.header_flags: description: 'Array of 2 letter DNS header flags. Expected values are: AA, TC, RD, RA, AD, CD, DO.' - example: - - RD - - RA + example: '["RD", "RA"]' flat_name: dns.header_flags ignore_above: 1024 level: extended @@ -1514,9 +1512,7 @@ dns.resolved_ip: it can contain. Extracting all IP addresses seen in there to `dns.resolved_ip` makes it possible to index them as IP addresses, and makes them easier to visualize and query for.' - example: - - 10.10.10.10 - - 10.10.10.11 + example: '["10.10.10.10", "10.10.10.11"]' flat_name: dns.resolved_ip level: extended name: resolved_ip @@ -4777,11 +4773,7 @@ process.args: executable. May be filtered to protect sensitive information.' - example: - - /usr/bin/ssh - - -l - - user - - 10.0.0.16 + example: '["/usr/bin/ssh", "-l", "user", "10.0.0.16"]' flat_name: process.args ignore_above: 1024 level: extended @@ -5007,11 +4999,7 @@ process.parent.args: executable. May be filtered to protect sensitive information.' - example: - - /usr/bin/ssh - - -l - - user - - 10.0.0.16 + example: '["/usr/bin/ssh", "-l", "user", "10.0.0.16"]' flat_name: process.parent.args ignore_above: 1024 level: extended @@ -5778,8 +5766,7 @@ rule.author: dashed_name: rule-author description: Name, organization, or pseudonym of the author or authors who created the rule used to generate this event. - example: - - Star-Lord + example: '["Star-Lord"]' flat_name: rule.author ignore_above: 1024 level: extended @@ -6998,9 +6985,7 @@ tls.client.certificate_chain: description: Array of PEM-encoded certificates that make up the certificate chain offered by the client. This is usually mutually-exclusive of `client.certificate` since that value should be the first certificate in the chain. - example: - - MII... - - MII... + example: '["MII...", "MII..."]' flat_name: tls.client.certificate_chain ignore_above: 1024 level: extended @@ -7126,10 +7111,8 @@ tls.client.subject: tls.client.supported_ciphers: dashed_name: tls-client-supported-ciphers description: Array of ciphers offered by the client during the client hello. - example: - - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 - - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 - - '...' + example: '["TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", + "..."]' flat_name: tls.client.supported_ciphers ignore_above: 1024 level: extended @@ -7508,9 +7491,7 @@ tls.server.certificate_chain: description: Array of PEM-encoded certificates that make up the certificate chain offered by the server. This is usually mutually-exclusive of `server.certificate` since that value should be the first certificate in the chain. - example: - - MII... - - MII... + example: '["MII...", "MII..."]' flat_name: tls.server.certificate_chain ignore_above: 1024 level: extended diff --git a/generated/ecs/ecs_nested.yml b/generated/ecs/ecs_nested.yml index 490b00eb74..c56839e37b 100644 --- a/generated/ecs/ecs_nested.yml +++ b/generated/ecs/ecs_nested.yml @@ -1735,9 +1735,7 @@ dns: description: 'Array of 2 letter DNS header flags. Expected values are: AA, TC, RD, RA, AD, CD, DO.' - example: - - RD - - RA + example: '["RD", "RA"]' flat_name: dns.header_flags ignore_above: 1024 level: extended @@ -1866,9 +1864,7 @@ dns: formats it can contain. Extracting all IP addresses seen in there to `dns.resolved_ip` makes it possible to index them as IP addresses, and makes them easier to visualize and query for.' - example: - - 10.10.10.10 - - 10.10.10.11 + example: '["10.10.10.10", "10.10.10.11"]' flat_name: dns.resolved_ip level: extended name: resolved_ip @@ -5824,11 +5820,7 @@ process: the executable. May be filtered to protect sensitive information.' - example: - - /usr/bin/ssh - - -l - - user - - 10.0.0.16 + example: '["/usr/bin/ssh", "-l", "user", "10.0.0.16"]' flat_name: process.args ignore_above: 1024 level: extended @@ -6054,11 +6046,7 @@ process: the executable. May be filtered to protect sensitive information.' - example: - - /usr/bin/ssh - - -l - - user - - 10.0.0.16 + example: '["/usr/bin/ssh", "-l", "user", "10.0.0.16"]' flat_name: process.parent.args ignore_above: 1024 level: extended @@ -6890,8 +6878,7 @@ rule: dashed_name: rule-author description: Name, organization, or pseudonym of the author or authors who created the rule used to generate this event. - example: - - Star-Lord + example: '["Star-Lord"]' flat_name: rule.author ignore_above: 1024 level: extended @@ -8193,9 +8180,7 @@ tls: description: Array of PEM-encoded certificates that make up the certificate chain offered by the client. This is usually mutually-exclusive of `client.certificate` since that value should be the first certificate in the chain. - example: - - MII... - - MII... + example: '["MII...", "MII..."]' flat_name: tls.client.certificate_chain ignore_above: 1024 level: extended @@ -8324,10 +8309,8 @@ tls: tls.client.supported_ciphers: dashed_name: tls-client-supported-ciphers description: Array of ciphers offered by the client during the client hello. - example: - - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 - - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 - - '...' + example: '["TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", + "..."]' flat_name: tls.client.supported_ciphers ignore_above: 1024 level: extended @@ -8706,9 +8689,7 @@ tls: description: Array of PEM-encoded certificates that make up the certificate chain offered by the server. This is usually mutually-exclusive of `server.certificate` since that value should be the first certificate in the chain. - example: - - MII... - - MII... + example: '["MII...", "MII..."]' flat_name: tls.server.certificate_chain ignore_above: 1024 level: extended diff --git a/schemas/README.md b/schemas/README.md index 9d1ac97696..c87be195a3 100644 --- a/schemas/README.md +++ b/schemas/README.md @@ -125,7 +125,9 @@ Supported keys to describe fields Defaults to the main description when absent. If the main description has multiple paragraphs, then a 'short' description with no newlines is required. -- example (optional): A single value example of what can be expected in this field +- example (optional): A single value example of what can be expected in this field. + Example values that are composite types (array, object) should be quoted to avoid YAML interpretation + in ECS-generated artifacts and other downstream projects depending on the schema. - multi\_fields (optional): Specify additional ways to index the field. - index (optional): If `False`, means field is not indexed (overrides type) - format: Field format that can be used in a Kibana index template. diff --git a/schemas/dns.yml b/schemas/dns.yml index 0c396a4a0f..afe11a190a 100644 --- a/schemas/dns.yml +++ b/schemas/dns.yml @@ -54,7 +54,7 @@ Array of 2 letter DNS header flags. Expected values are: AA, TC, RD, RA, AD, CD, DO. - example: [RD, RA] + example: "[\"RD\", \"RA\"]" normalize: - array @@ -205,6 +205,6 @@ data formats it can contain. Extracting all IP addresses seen in there to `dns.resolved_ip` makes it possible to index them as IP addresses, and makes them easier to visualize and query for. - example: [10.10.10.10, 10.10.10.11] + example: '["10.10.10.10", "10.10.10.11"]' normalize: - array diff --git a/schemas/process.yml b/schemas/process.yml index b8f1f4b11e..13ec63c07f 100644 --- a/schemas/process.yml +++ b/schemas/process.yml @@ -92,7 +92,7 @@ Array of process arguments, starting with the absolute path to the executable. May be filtered to protect sensitive information. - example: ["/usr/bin/ssh", "-l", "user", "10.0.0.16"] + example: "[\"/usr/bin/ssh\", \"-l\", \"user\", \"10.0.0.16\"]" normalize: - array diff --git a/schemas/rule.yml b/schemas/rule.yml index a9f6966705..c0daf79892 100644 --- a/schemas/rule.yml +++ b/schemas/rule.yml @@ -88,7 +88,7 @@ description: > Name, organization, or pseudonym of the author or authors who created the rule used to generate this event. - example: ['Star-Lord'] + example: "[\"Star-Lord\"]" normalize: - array diff --git a/schemas/tls.yml b/schemas/tls.yml index 569f09d54a..3ecacb041a 100644 --- a/schemas/tls.yml +++ b/schemas/tls.yml @@ -73,7 +73,7 @@ type: keyword level: extended description: Array of ciphers offered by the client during the client hello. - example: ["TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", "..."] + example: "[\"TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384\", \"TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384\", \"...\"]" normalize: - array @@ -109,7 +109,7 @@ Array of PEM-encoded certificates that make up the certificate chain offered by the client. This is usually mutually-exclusive of `client.certificate` since that value should be the first certificate in the chain. - example: ["MII...", "MII..."] + example: "[\"MII...\", \"MII...\"]" normalize: - array @@ -188,7 +188,7 @@ Array of PEM-encoded certificates that make up the certificate chain offered by the server. This is usually mutually-exclusive of `server.certificate` since that value should be the first certificate in the chain. - example: ["MII...", "MII..."] + example: "[\"MII...\", \"MII...\"]" normalize: - array diff --git a/scripts/schema/cleaner.py b/scripts/schema/cleaner.py index 5f62b2daac..5f15d459fe 100644 --- a/scripts/schema/cleaner.py +++ b/scripts/schema/cleaner.py @@ -169,6 +169,7 @@ def field_assertions_and_warnings(field): if not ecs_helpers.is_intermediate(field): # check short description length if in strict mode single_line_short_description(field, strict=strict_mode) + check_example_value(field, strict=strict_mode) if field['field_details']['level'] not in ACCEPTABLE_FIELD_LEVELS: msg = "Invalid level for field '{}'.\nValue: {}\nAcceptable values: {}".format( field['field_details']['name'], field['field_details']['level'], @@ -193,3 +194,18 @@ def single_line_short_description(schema_or_field, strict=True): raise ValueError(msg) else: ecs_helpers.strict_warning(msg) + + +def check_example_value(field, strict=True): + """ + Checks if value of the example field is of type list or dict. + Fails or warns (depending on strict mode) if so. + """ + example_value = field['field_details'].get('example', None) + if isinstance(example_value, (list, dict)): + name = field['field_details']['name'] + msg = f"Example value for field `{name}` contains an object or array which must be quoted to avoid YAML interpretation." + if strict: + raise ValueError(msg) + else: + ecs_helpers.strict_warning(msg) diff --git a/scripts/tests/unit/test_schema_cleaner.py b/scripts/tests/unit/test_schema_cleaner.py index 4c20fac01f..8298a32bb3 100644 --- a/scripts/tests/unit/test_schema_cleaner.py +++ b/scripts/tests/unit/test_schema_cleaner.py @@ -282,6 +282,64 @@ def test_multiline_short_description_warns_strict_disabled(self): except Exception: self.fail("cleaner.single_line_short_description() raised Exception unexpectedly.") + def test_field_example_value_is_object_raises(self): + field = { + 'field_details': { + 'name': 'test', + 'example': { + 'a': 'bob', + 'b': 'alice' + } + } + } + with self.assertRaisesRegex(ValueError, 'contains an object or array'): + cleaner.check_example_value(field) + + def test_field_example_value_is_array_raises(self): + field = { + 'field_details': { + 'name': 'test', + 'example': [ + 'bob', + 'alice' + ] + } + } + with self.assertRaisesRegex(ValueError, 'contains an object or array'): + cleaner.check_example_value(field) + + def test_example_field_value_is_object_warns_strict_disabled(self): + field = { + 'field_details': { + 'name': 'test', + 'example': { + 'a': 'bob', + 'b': 'alice' + } + } + } + try: + with self.assertWarnsRegex(UserWarning, 'contains an object or array'): + cleaner.check_example_value(field, strict=False) + except Exception: + self.fail("cleaner.check_example_value() raised Exception unexpectedly.") + + def test_example_field_value_is_array_warns_strict_disabled(self): + field = { + 'field_details': { + 'name': 'test', + 'example': [ + 'bob', + 'alice' + ] + } + } + try: + with self.assertWarnsRegex(UserWarning, 'contains an object or array'): + cleaner.check_example_value(field, strict=False) + except Exception: + self.fail("cleaner.check_example_value() raised Exception unexpectedly.") + def test_clean(self): '''A high level sanity test''' fields = self.schema_process()