From 4ab85fa766a76ba37a685b02bd14eb79dde73744 Mon Sep 17 00:00:00 2001 From: Eric Beahan Date: Wed, 13 Jan 2021 16:33:14 -0600 Subject: [PATCH] [1.x] Stage 2 changes for RFC 0009 - data_stream fields (#1215) (#1222) --- experimental/generated/beats/fields.ecs.yml | 52 ++++++++++++++ experimental/generated/csv/fields.csv | 3 + experimental/generated/ecs/ecs_flat.yml | 46 +++++++++++++ experimental/generated/ecs/ecs_nested.yml | 69 +++++++++++++++++++ .../generated/elasticsearch/7/template.json | 13 ++++ .../elasticsearch/component/data_stream.json | 25 +++++++ .../generated/elasticsearch/template.json | 3 +- experimental/schemas/data_stream.yml | 60 ++++++++++++++++ 8 files changed, 270 insertions(+), 1 deletion(-) create mode 100644 experimental/generated/elasticsearch/component/data_stream.json create mode 100644 experimental/schemas/data_stream.yml diff --git a/experimental/generated/beats/fields.ecs.yml b/experimental/generated/beats/fields.ecs.yml index 02da5c2ee4..d19d6a36d8 100644 --- a/experimental/generated/beats/fields.ecs.yml +++ b/experimental/generated/beats/fields.ecs.yml @@ -564,6 +564,58 @@ ignore_above: 1024 description: Runtime managing this container. example: docker + - name: data_stream + title: Data Stream + group: 2 + description: 'The data_stream fields take part in defining the new data stream + naming scheme. + + In the new data stream naming scheme the value of the data stream fields combine + to the name of the actual data stream in the following manner `{data_stream.type}-{data_stream.dataset}-{data_stream.namespace}`. + This means the fields can only contain characters that are valid as part of + names of data streams. More details about this can be found in this https://www.elastic.co/blog/an-introduction-to-the-elastic-data-stream-naming-scheme[blog + post]. + + An Elasticsearch data stream consists of one or more backing indices, and a + data stream name forms part of the backing indices names. Due to this convention, + data streams must also follow index naming restrictions. For example, data stream + names cannot include \, /, *, ?, ", <, >, |, ` `. Please see the Elasticsearch + reference for additional https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-index.html#indices-create-api-path-params[restrictions].' + type: group + fields: + - name: dataset + level: extended + type: constant_keyword + description: "The field can contain anything that makes sense to signify the\ + \ source of the data.\nExamples include `nginx.access`, `prometheus`, `endpoint`\ + \ etc. For data streams that otherwise fit, but that do not have dataset set\ + \ we use the value \"generic\" for the dataset value. `event.dataset` should\ + \ have the same value as `data_stream.dataset`.\nBeyond the Elasticsearch\ + \ data stream naming criteria noted above, the `dataset` value has additional\ + \ restrictions:\n * Must not contain `-`\n * No longer than 100 characters" + example: nginx.access + default_field: false + - name: namespace + level: extended + type: constant_keyword + description: "A user defined namespace. Namespaces are useful to allow grouping\ + \ of data.\nMany users already organize their indices this way, and the data\ + \ stream naming scheme now provides this best practice as a default. Many\ + \ users will populate this field with `default`. If no value is used, it falls\ + \ back to `default`.\nBeyond the Elasticsearch index naming criteria noted\ + \ above, `namespace` value has the additional restrictions:\n * Must not\ + \ contain `-`\n * No longer than 100 characters" + example: production + default_field: false + - name: type + level: extended + type: constant_keyword + description: 'An overarching type for the data stream. + + Currently allowed values are "logs" and "metrics". We expect to also add "traces" + and "synthetics" in the near future.' + example: logs + default_field: false - name: destination title: Destination group: 2 diff --git a/experimental/generated/csv/fields.csv b/experimental/generated/csv/fields.csv index b5efd516c7..95199f66a2 100644 --- a/experimental/generated/csv/fields.csv +++ b/experimental/generated/csv/fields.csv @@ -60,6 +60,9 @@ ECS_Version,Indexed,Field_Set,Field,Type,Level,Normalization,Example,Description 1.9.0-dev+exp,true,container,container.labels,object,extended,,,Image labels. 1.9.0-dev+exp,true,container,container.name,keyword,extended,,,Container name. 1.9.0-dev+exp,true,container,container.runtime,keyword,extended,,docker,Runtime managing this container. +1.9.0-dev+exp,true,data_stream,data_stream.dataset,constant_keyword,extended,,nginx.access,The field can contain anything that makes sense to signify the source of the data. +1.9.0-dev+exp,true,data_stream,data_stream.namespace,constant_keyword,extended,,production,A user defined namespace. Namespaces are useful to allow grouping of data. +1.9.0-dev+exp,true,data_stream,data_stream.type,constant_keyword,extended,,logs,An overarching type for the data stream. 1.9.0-dev+exp,true,destination,destination.address,keyword,extended,,,Destination network address. 1.9.0-dev+exp,true,destination,destination.as.number,long,extended,,15169,Unique number allocated to the autonomous system. 1.9.0-dev+exp,true,destination,destination.as.organization.name,wildcard,extended,,Google LLC,Organization name. diff --git a/experimental/generated/ecs/ecs_flat.yml b/experimental/generated/ecs/ecs_flat.yml index f98d8b95ce..a7c053c2d1 100644 --- a/experimental/generated/ecs/ecs_flat.yml +++ b/experimental/generated/ecs/ecs_flat.yml @@ -705,6 +705,52 @@ container.runtime: normalize: [] short: Runtime managing this container. type: keyword +data_stream.dataset: + dashed_name: data-stream-dataset + description: "The field can contain anything that makes sense to signify the source\ + \ of the data.\nExamples include `nginx.access`, `prometheus`, `endpoint` etc.\ + \ For data streams that otherwise fit, but that do not have dataset set we use\ + \ the value \"generic\" for the dataset value. `event.dataset` should have the\ + \ same value as `data_stream.dataset`.\nBeyond the Elasticsearch data stream naming\ + \ criteria noted above, the `dataset` value has additional restrictions:\n *\ + \ Must not contain `-`\n * No longer than 100 characters" + example: nginx.access + flat_name: data_stream.dataset + level: extended + name: dataset + normalize: [] + short: The field can contain anything that makes sense to signify the source of + the data. + type: constant_keyword +data_stream.namespace: + dashed_name: data-stream-namespace + description: "A user defined namespace. Namespaces are useful to allow grouping\ + \ of data.\nMany users already organize their indices this way, and the data stream\ + \ naming scheme now provides this best practice as a default. Many users will\ + \ populate this field with `default`. If no value is used, it falls back to `default`.\n\ + Beyond the Elasticsearch index naming criteria noted above, `namespace` value\ + \ has the additional restrictions:\n * Must not contain `-`\n * No longer than\ + \ 100 characters" + example: production + flat_name: data_stream.namespace + level: extended + name: namespace + normalize: [] + short: A user defined namespace. Namespaces are useful to allow grouping of data. + type: constant_keyword +data_stream.type: + dashed_name: data-stream-type + description: 'An overarching type for the data stream. + + Currently allowed values are "logs" and "metrics". We expect to also add "traces" + and "synthetics" in the near future.' + example: logs + flat_name: data_stream.type + level: extended + name: type + normalize: [] + short: An overarching type for the data stream. + type: constant_keyword destination.address: dashed_name: destination-address description: 'Some event destination addresses are defined ambiguously. The event diff --git a/experimental/generated/ecs/ecs_nested.yml b/experimental/generated/ecs/ecs_nested.yml index 97acbc2459..2b825db77d 100644 --- a/experimental/generated/ecs/ecs_nested.yml +++ b/experimental/generated/ecs/ecs_nested.yml @@ -983,6 +983,75 @@ container: short: Fields describing the container that generated this event. title: Container type: group +data_stream: + description: 'The data_stream fields take part in defining the new data stream naming + scheme. + + In the new data stream naming scheme the value of the data stream fields combine + to the name of the actual data stream in the following manner `{data_stream.type}-{data_stream.dataset}-{data_stream.namespace}`. + This means the fields can only contain characters that are valid as part of names + of data streams. More details about this can be found in this https://www.elastic.co/blog/an-introduction-to-the-elastic-data-stream-naming-scheme[blog + post]. + + An Elasticsearch data stream consists of one or more backing indices, and a data + stream name forms part of the backing indices names. Due to this convention, data + streams must also follow index naming restrictions. For example, data stream names + cannot include \, /, *, ?, ", <, >, |, ` `. Please see the Elasticsearch reference + for additional https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-index.html#indices-create-api-path-params[restrictions].' + fields: + data_stream.dataset: + dashed_name: data-stream-dataset + description: "The field can contain anything that makes sense to signify the\ + \ source of the data.\nExamples include `nginx.access`, `prometheus`, `endpoint`\ + \ etc. For data streams that otherwise fit, but that do not have dataset set\ + \ we use the value \"generic\" for the dataset value. `event.dataset` should\ + \ have the same value as `data_stream.dataset`.\nBeyond the Elasticsearch\ + \ data stream naming criteria noted above, the `dataset` value has additional\ + \ restrictions:\n * Must not contain `-`\n * No longer than 100 characters" + example: nginx.access + flat_name: data_stream.dataset + level: extended + name: dataset + normalize: [] + short: The field can contain anything that makes sense to signify the source + of the data. + type: constant_keyword + data_stream.namespace: + dashed_name: data-stream-namespace + description: "A user defined namespace. Namespaces are useful to allow grouping\ + \ of data.\nMany users already organize their indices this way, and the data\ + \ stream naming scheme now provides this best practice as a default. Many\ + \ users will populate this field with `default`. If no value is used, it falls\ + \ back to `default`.\nBeyond the Elasticsearch index naming criteria noted\ + \ above, `namespace` value has the additional restrictions:\n * Must not\ + \ contain `-`\n * No longer than 100 characters" + example: production + flat_name: data_stream.namespace + level: extended + name: namespace + normalize: [] + short: A user defined namespace. Namespaces are useful to allow grouping of + data. + type: constant_keyword + data_stream.type: + dashed_name: data-stream-type + description: 'An overarching type for the data stream. + + Currently allowed values are "logs" and "metrics". We expect to also add "traces" + and "synthetics" in the near future.' + example: logs + flat_name: data_stream.type + level: extended + name: type + normalize: [] + short: An overarching type for the data stream. + type: constant_keyword + group: 2 + name: data_stream + prefix: data_stream. + short: The data_stream fields take part in defining the new data stream naming scheme. + title: Data Stream + type: group destination: description: 'Destination fields capture details about the receiver of a network exchange/packet. These fields are populated from a network event, packet, or other diff --git a/experimental/generated/elasticsearch/7/template.json b/experimental/generated/elasticsearch/7/template.json index 029aa451f3..7420e1c441 100644 --- a/experimental/generated/elasticsearch/7/template.json +++ b/experimental/generated/elasticsearch/7/template.json @@ -303,6 +303,19 @@ } } }, + "data_stream": { + "properties": { + "dataset": { + "type": "constant_keyword" + }, + "namespace": { + "type": "constant_keyword" + }, + "type": { + "type": "constant_keyword" + } + } + }, "destination": { "properties": { "address": { diff --git a/experimental/generated/elasticsearch/component/data_stream.json b/experimental/generated/elasticsearch/component/data_stream.json new file mode 100644 index 0000000000..3d4d93c586 --- /dev/null +++ b/experimental/generated/elasticsearch/component/data_stream.json @@ -0,0 +1,25 @@ +{ + "_meta": { + "documentation": "https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html", + "ecs_version": "1.9.0-dev+exp" + }, + "template": { + "mappings": { + "properties": { + "data_stream": { + "properties": { + "dataset": { + "type": "constant_keyword" + }, + "namespace": { + "type": "constant_keyword" + }, + "type": { + "type": "constant_keyword" + } + } + } + } + } + } +} \ No newline at end of file diff --git a/experimental/generated/elasticsearch/template.json b/experimental/generated/elasticsearch/template.json index b8f252c020..f81f6b49dc 100644 --- a/experimental/generated/elasticsearch/template.json +++ b/experimental/generated/elasticsearch/template.json @@ -37,7 +37,8 @@ "ecs_1.9.0-dev-exp_url", "ecs_1.9.0-dev-exp_user", "ecs_1.9.0-dev-exp_user_agent", - "ecs_1.9.0-dev-exp_vulnerability" + "ecs_1.9.0-dev-exp_vulnerability", + "ecs_1.9.0-dev-exp_data_stream" ], "index_patterns": [ "try-ecs-*" diff --git a/experimental/schemas/data_stream.yml b/experimental/schemas/data_stream.yml new file mode 100644 index 0000000000..d651800fa4 --- /dev/null +++ b/experimental/schemas/data_stream.yml @@ -0,0 +1,60 @@ +--- +- name: data_stream + title: Data Stream + short: The data_stream fields take part in defining the new data stream naming scheme. + description: > + The data_stream fields take part in defining the new data stream naming scheme. + + In the new data stream naming scheme the value of the data stream fields combine to the name of the actual data + stream in the following manner `{data_stream.type}-{data_stream.dataset}-{data_stream.namespace}`. This means the fields + can only contain characters that are valid as part of names of data streams. More details about this can be found in + this https://www.elastic.co/blog/an-introduction-to-the-elastic-data-stream-naming-scheme[blog post]. + + An Elasticsearch data stream consists of one or more backing indices, and a data stream name forms part of the backing indices names. + Due to this convention, data streams must also follow index naming restrictions. For example, data stream names cannot include \, /, *, ?, ", <, >, |, ` `. + Please see the Elasticsearch reference for additional https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-index.html#indices-create-api-path-params[restrictions]. + fields: + + - name: type + level: extended + type: constant_keyword + example: logs + # Any future values for `data_stream.type` should also adhere to the following restrictions (these are derived from the Elasticsearch index restrictions): + # * Must not contain `-` + # * Must not start with `+` or `_` + description: > + An overarching type for the data stream. + + Currently allowed values are "logs" and "metrics". We expect to also add "traces" and "synthetics" in the near future. + short: An overarching type for the data stream. + + - name: dataset + level: extended + type: constant_keyword + example: nginx.access + description: > + The field can contain anything that makes sense to signify the source of the data. + + Examples include `nginx.access`, `prometheus`, `endpoint` etc. For data streams that otherwise fit, but that + do not have dataset set we use the value "generic" for the dataset value. `event.dataset` should have the + same value as `data_stream.dataset`. + + Beyond the Elasticsearch data stream naming criteria noted above, the `dataset` value has additional restrictions: + * Must not contain `-` + * No longer than 100 characters + short: The field can contain anything that makes sense to signify the source of the data. + + - name: namespace + level: extended + type: constant_keyword + example: production + description: > + A user defined namespace. Namespaces are useful to allow grouping of data. + + Many users already organize their indices this way, and the data stream naming scheme now provides this + best practice as a default. Many users will populate this field with `default`. If no value is used, it falls back to `default`. + + Beyond the Elasticsearch index naming criteria noted above, `namespace` value has the additional restrictions: + * Must not contain `-` + * No longer than 100 characters + short: A user defined namespace. Namespaces are useful to allow grouping of data.