From c36e111327f8770382fd30fcb30208c106cd0c90 Mon Sep 17 00:00:00 2001 From: Mathieu Martin Date: Fri, 2 Oct 2020 13:35:25 -0400 Subject: [PATCH] Add --oss flag to the ECS generator script (#991) --- CHANGELOG.next.md | 1 + USAGE.md | 27 ++++++++++++++++++++- scripts/generator.py | 14 +++++++---- scripts/schema/oss.py | 29 ++++++++++++++++++++++ scripts/tests/unit/test_schema_oss.py | 35 +++++++++++++++++++++++++++ 5 files changed, 100 insertions(+), 6 deletions(-) create mode 100644 scripts/schema/oss.py create mode 100644 scripts/tests/unit/test_schema_oss.py diff --git a/CHANGELOG.next.md b/CHANGELOG.next.md index 6d9738be17..afd179c547 100644 --- a/CHANGELOG.next.md +++ b/CHANGELOG.next.md @@ -43,6 +43,7 @@ Thanks, you're awesome :-) --> * Introduced `--strict` flag to perform stricter schema validation when running the generator script. #937 * Added check under `--strict` that ensures composite types in example fields are quoted. #966 * Added `ignore_above` and `normalizer` support for keyword multi-fields. #971 +* Added `--oss` flag for users who want to generate ECS templates for use on OSS clusters. #991 #### Improvements diff --git a/USAGE.md b/USAGE.md index e70da6b14f..cb0c49bf27 100644 --- a/USAGE.md +++ b/USAGE.md @@ -29,6 +29,7 @@ relevant artifacts for their unique set of data sources. + [Subset](#subset) + [Ref](#ref) + [Mapping & Template Settings](#mapping--template-settings) + + [OSS](#oss) + [Strict Mode](#strict-mode) + [Intermediate-Only](#intermediate-only) @@ -295,6 +296,30 @@ The `--template-settings` argument defines [index level settings](https://www.el For `template.json`, the `mappings` object is left empty: `{}`. Likewise the `properties` object remains empty in the `mapping.json` example. This will be filled in automatically by the script. +#### OSS + +**IMPORTANT**: This feature is unnecessary for most users. Our default free distribution +comes with the Elastic Basic license, and supports all data types used by ECS. +Learn more about our licenses [here](https://www.elastic.co/subscriptions). + +Users that want to use the open source version of Elasticsearch do not have access to the basic data types. +However some of these types have an OSS replacement that can be used instead, without too much loss of functionality. + +This flag performs a best effort fallback, replacing basic data types with their OSS replacement. + +Indices using purely OSS types will benefit from the normalization of ECS, but may be missing on some of the added functionality of these basic types. + +Current fallbacks applied by this flag are: + +- `wildcard` => `keyword` +- `version` => `keyword` + +Usage: + +``` +$ python scripts/generator.py --oss +``` + #### Strict Mode The `--strict` argument enables "strict mode". Strict mode performs a stricter validation step against the schema's contents. @@ -302,7 +327,7 @@ The `--strict` argument enables "strict mode". Strict mode performs a stricter v Basic usage: ``` -$ python/generator.py --strict +$ python scripts/generator.py --strict ``` Strict mode requires the following conditions, else the script exits on an exception: diff --git a/scripts/generator.py b/scripts/generator.py index 733f4155fe..b6dcf05db9 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -12,6 +12,7 @@ from generators import intermediate_files from schema import loader +from schema import oss from schema import cleaner from schema import finalizer from schema import subset_filter @@ -41,6 +42,8 @@ def main(): # ecs_helpers.yaml_dump('ecs.yml', fields) fields = loader.load_schemas(ref=args.ref, included_files=args.include) + if args.oss: + oss.fallback(fields) cleaner.clean(fields, strict=args.strict) finalizer.finalize(fields) fields = subset_filter.filter(fields, args.subset, out_dir) @@ -60,20 +63,21 @@ def main(): def argument_parser(): parser = argparse.ArgumentParser() - parser.add_argument('--intermediate-only', action='store_true', - help='generate intermediary files only') + parser.add_argument('--ref', action='store', help='git reference to use when building schemas') parser.add_argument('--include', nargs='+', help='include user specified directory of custom field definitions') parser.add_argument('--subset', nargs='+', help='render a subset of the schema') - parser.add_argument('--out', action='store', help='directory to store the generated files') - parser.add_argument('--ref', action='store', help='git reference to use when building schemas') + parser.add_argument('--out', action='store', help='directory to output the generated files') parser.add_argument('--template-settings', action='store', help='index template settings to use when generating elasticsearch template') parser.add_argument('--mapping-settings', action='store', help='mapping settings to use when generating elasticsearch template') + parser.add_argument('--oss', action='store_true', help='replace basic data types with oss ones where possible') parser.add_argument('--strict', action='store_true', - help='enforce stricter checking at schema cleanup') + help='enforce strict checking at schema cleanup') + parser.add_argument('--intermediate-only', action='store_true', + help='generate intermediary files only') args = parser.parse_args() # Clean up empty include of the Makefile if args.include and [''] == args.include: diff --git a/scripts/schema/oss.py b/scripts/schema/oss.py new file mode 100644 index 0000000000..ba38a254b1 --- /dev/null +++ b/scripts/schema/oss.py @@ -0,0 +1,29 @@ +# This script performs a best effort fallback of basic data types to equivalent +# OSS data types. +# Note however that not all basic data types have an OSS replacement. +# +# The way this script is currently written, it has to be run on the fields *before* +# the cleaner script applies defaults, as there's no concept of defaults here. +# But since it navigates using the visitor script, it can easily be moved around +# in the chain, provided we add support for defaults as well. +# +# For now, no warning is output on basic fields that don't have a fallback. +# This could be improved if ECS starts using such types. + +from schema import visitor + +TYPE_FALLBACKS = { + 'wildcard': 'keyword', + 'version': 'keyword' +} + + +def fallback(fields): + """Verify all fields for basic data type usage, and fallback to an OSS equivalent if appropriate.""" + visitor.visit_fields(fields, field_func=perform_fallback) + + +def perform_fallback(field): + """Performs a best effort fallback of basic data types to equivalent OSS data types.""" + if field['field_details']['type'] in TYPE_FALLBACKS.keys(): + field['field_details']['type'] = TYPE_FALLBACKS[field['field_details']['type']] diff --git a/scripts/tests/unit/test_schema_oss.py b/scripts/tests/unit/test_schema_oss.py new file mode 100644 index 0000000000..4ac08d9d08 --- /dev/null +++ b/scripts/tests/unit/test_schema_oss.py @@ -0,0 +1,35 @@ +import os +import pprint +import sys +import unittest + +sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) + +from schema import oss +from schema import visitor + + +class TestSchemaOss(unittest.TestCase): + + def setUp(self): + self.maxDiff = None + + def test_wildcard_fallback(self): + field = {'field_details': {'name': 'myfield', 'type': 'wildcard'}} + oss.perform_fallback(field) + self.assertEqual('keyword', field['field_details']['type']) + + def test_version_fallback(self): + field = {'field_details': {'name': 'myfield', 'type': 'version'}} + oss.perform_fallback(field) + self.assertEqual('keyword', field['field_details']['type']) + + def test_basic_without_fallback(self): + field = {'field_details': {'name': 'myfield', 'type': 'histogram'}} + oss.perform_fallback(field) + self.assertEqual('histogram', field['field_details']['type']) + + def test_oss_no_fallback(self): + field = {'field_details': {'name': 'myfield', 'type': 'keyword'}} + oss.perform_fallback(field) + self.assertEqual('keyword', field['field_details']['type'])