From a648d11a8afd39d4390411dc9ba0a774194ca1f6 Mon Sep 17 00:00:00 2001 From: Tetiana Kravchenko Date: Mon, 8 Nov 2021 11:36:48 +0100 Subject: [PATCH 1/7] add documentation for add_kubernetes_metadata matcher; support 'var/log/pods' for resource_type: pod Signed-off-by: Tetiana Kravchenko --- .../add_kubernetes_metadata/matchers.go | 101 +++++++++++------- .../add_kubernetes_metadata/matchers_test.go | 24 +++++ .../docs/indexers_and_matchers.asciidoc | 27 ++++- 3 files changed, 111 insertions(+), 41 deletions(-) diff --git a/filebeat/processor/add_kubernetes_metadata/matchers.go b/filebeat/processor/add_kubernetes_metadata/matchers.go index 55336c3a315b..93174a407137 100644 --- a/filebeat/processor/add_kubernetes_metadata/matchers.go +++ b/filebeat/processor/add_kubernetes_metadata/matchers.go @@ -78,59 +78,79 @@ func newLogsPathMatcher(cfg common.Config) (add_kubernetes_metadata.Matcher, err // Docker container ID is a 64-character-long hexadecimal string const containerIdLen = 64 -// Pod UID is on the 5th index of the path directories -const podUIDPos = 5 - func (f *LogPathMatcher) MetadataIndex(event common.MapStr) string { value, err := event.GetValue("log.file.path") - if err == nil { - source := value.(string) - f.logger.Debugf("Incoming log.file.path value: %s", source) + if err != nil { + f.logger.Errorf("Error extracting log.file.path from the event") + return "" + } - if !strings.Contains(source, f.LogsPath) { - f.logger.Errorf("Error extracting container id - source value does not contain matcher's logs_path '%s'.", f.LogsPath) - return "" - } + source := value.(string) + f.logger.Debugf("Incoming log.file.path value: %s", source) + + if !strings.Contains(source, f.LogsPath) { + f.logger.Errorf("Error extracting container id - source value does not contain matcher's logs_path '%s'.", f.LogsPath) + return "" + } - sourceLen := len(source) - logsPathLen := len(f.LogsPath) + sourceLen := len(source) + logsPathLen := len(f.LogsPath) - if f.ResourceType == "pod" { - // Specify a pod resource type when manually mounting log volumes and they end up under "/var/lib/kubelet/pods/" - // This will extract only the pod UID, which offers less granularity of metadata when compared to the container ID - if strings.HasPrefix(f.LogsPath, podLogsPath()) && strings.HasSuffix(source, ".log") { + if f.ResourceType == "pod" { + // Pod resource type will extract only the pod UID, which offers less granularity of metadata when compared to the container ID + if strings.HasSuffix(source, ".log") { + // Specify a pod resource type when writting logs into manually mounted log volume, + // those logs apper under under "/var/lib/kubelet/pods//volumes/..." + if strings.HasPrefix(f.LogsPath, podKubeletLogsPath()) { pathDirs := strings.Split(source, pathSeparator) + podUIDPos := 5 if len(pathDirs) > podUIDPos { podUID := strings.Split(source, pathSeparator)[podUIDPos] - f.logger.Debugf("Using pod uid: %s", podUID) return podUID } - - f.logger.Error("Error extracting pod uid - source value contains matcher's logs_path, however it is too short to contain a Pod UID.") - } - } else { - // In case of the Kubernetes log path "/var/log/containers/", - // the container ID will be located right before the ".log" extension. - if strings.HasPrefix(f.LogsPath, containerLogsPath()) && strings.HasSuffix(source, ".log") && sourceLen >= containerIdLen+4 { - containerIDEnd := sourceLen - 4 - cid := source[containerIDEnd-containerIdLen : containerIDEnd] - f.logger.Debugf("Using container id: %s", cid) - return cid } - - // In any other case, we assume the container ID will follow right after the log path. - // However we need to check the length to prevent "slice bound out of range" runtime errors. - if sourceLen >= logsPathLen+containerIdLen { - cid := source[logsPathLen : logsPathLen+containerIdLen] - f.logger.Debugf("Using container id: %s", cid) - return cid + // In case of the Kubernetes log path "/var/log/pods/", + // the pod ID will be extracted from the directory name, + // file name example: "/var/log/pods/'__'/container_name/0.log". + if strings.HasPrefix(f.LogsPath, podLogsPath()) { + pathDirs := strings.Split(source, pathSeparator) + podUIDPos := 4 + if len(pathDirs) > podUIDPos { + podUID := strings.Split(pathDirs[podUIDPos], "_") + if len(podUID) > 2 { + f.logger.Debugf("Using pod uid: %s", podUID) + return podUID[2] + } + } } - f.logger.Error("Error extracting container id - source value contains matcher's logs_path, however it is too short to contain a Docker container ID.") + f.logger.Error(`Error extracting pod uid - source value does not contains matcher's logs_path, + supported log_path for 'pod' resource_type: '/var/lib/kubelet/pods/', '/var/log/pods/'.`) + return "" } } + // In case of the Kubernetes log path "/var/log/containers/", + // the container ID will be located right before the ".log" extension. + // file name example: /var/log/containers/__-.log + if strings.HasPrefix(f.LogsPath, containerLogsPath()) && strings.HasSuffix(source, ".log") && sourceLen >= containerIdLen+4 { + containerIDEnd := sourceLen - 4 + cid := source[containerIDEnd-containerIdLen : containerIDEnd] + f.logger.Debugf("Using container id: %s", cid) + return cid + } + // In any other case, we assume the container ID will follow right after the log path. + // However we need to check the length to prevent "slice bound out of range" runtime errors. + // for the default log path /var/lib/docker/containers/ container ID will follow right after the log path. + // file name example: /var/lib/docker/containers//-json.log + if sourceLen >= logsPathLen+containerIdLen { + cid := source[logsPathLen : logsPathLen+containerIdLen] + f.logger.Debugf("Using container id: %s", cid) + return cid + } + + f.logger.Error("Error extracting container id - source value contains matcher's logs_path, however it is too short to contain a Docker container ID.") return "" } @@ -141,13 +161,20 @@ func defaultLogPath() string { return "/var/lib/docker/containers/" } -func podLogsPath() string { +func podKubeletLogsPath() string { if runtime.GOOS == "windows" { return "C:\\var\\lib\\kubelet\\pods\\" } return "/var/lib/kubelet/pods/" } +func podLogsPath() string { + if runtime.GOOS == "windows" { + return "C:\\var\\log\\pods\\" + } + return "/var/log/pods/" +} + func containerLogsPath() string { if runtime.GOOS == "windows" { return "C:\\var\\log\\containers\\" diff --git a/filebeat/processor/add_kubernetes_metadata/matchers_test.go b/filebeat/processor/add_kubernetes_metadata/matchers_test.go index 5fc76514377b..7bbaf589a5cd 100644 --- a/filebeat/processor/add_kubernetes_metadata/matchers_test.go +++ b/filebeat/processor/add_kubernetes_metadata/matchers_test.go @@ -117,6 +117,30 @@ func TestLogsPathMatcher_InvalidSource4(t *testing.T) { executeTestWithResourceType(t, cfgLogsPath, cfgResourceType, source, expectedResult) } +func TestLogsPathMatcher_InvalidVarLogPodSource(t *testing.T) { + cfgLogsPath := "/var/log/pods/" + cfgResourceType := "pod" + source := fmt.Sprintf("/invalid/dir/namespace_pod-name_%s/container/0.log", puid) + expectedResult := "" + executeTestWithResourceType(t, cfgLogsPath, cfgResourceType, source, expectedResult) +} + +func TestLogsPathMatcher_InvalidVarLogPodIDFormat(t *testing.T) { + cfgLogsPath := "/var/log/pods/" + cfgResourceType := "pod" + source := fmt.Sprintf("/var/log/pods/%s/container/0.log", puid) + expectedResult := "" + executeTestWithResourceType(t, cfgLogsPath, cfgResourceType, source, expectedResult) +} + +func TestLogsPathMatcher_ValidVarLogPod(t *testing.T) { + cfgLogsPath := "/var/log/pods/" + cfgResourceType := "pod" + source := fmt.Sprintf("/var/log/pods/namespace_pod-name_%s/container/0.log", puid) + expectedResult := puid + executeTestWithResourceType(t, cfgLogsPath, cfgResourceType, source, expectedResult) +} + func executeTest(t *testing.T, cfgLogsPath string, source string, expectedResult string) { executeTestWithResourceType(t, cfgLogsPath, "", source, expectedResult) } diff --git a/libbeat/processors/add_kubernetes_metadata/docs/indexers_and_matchers.asciidoc b/libbeat/processors/add_kubernetes_metadata/docs/indexers_and_matchers.asciidoc index 4c7fdba7503e..035977bb9431 100644 --- a/libbeat/processors/add_kubernetes_metadata/docs/indexers_and_matchers.asciidoc +++ b/libbeat/processors/add_kubernetes_metadata/docs/indexers_and_matchers.asciidoc @@ -83,10 +83,29 @@ the `log.file.path` field. This matcher has the following configuration settings: `logs_path`:: (Optional) Base path of container logs. If not specified, it uses -the default logs path of the platform where {beatname_uc} is running. -`resource_type`:: (Optional) Type of the resource to obtain the ID of. It can be -`pod`, to make the lookup based on the pod UID, or `container`, to make the -lookup based on the container ID. It defaults to `container`. +the default logs path of the platform where {beatname_uc} is running: for Linux - +`/var/lib/docker/containers/`, Windowd - `C:\\ProgramData\\Docker\\containers`. +To change the default value: container ID must follow right after the `logs_path` - +`/`, where `container_id` is a 64-character-long +hexadecimal string. + +`resource_type`:: (Optional) Type of the resource to obtain the ID of. +Valid `resource_type`: +* `pod`: to make the lookup based on the pod UID. When `resource_type` is set to +`pod`, `logs_path` must be set as well, supported path in this case: +** `/var/lib/kubelet/pods/` used to read logs from mounted into the pod volumes, +those logs end up under `/var/lib/kubelet/pods//volumes//...` +To use `/var/lib/kubelet/pods/` as a `log_path`, `/var/lib/kubelet/pods` must be +mounted into the filebear pods. +** `/var/log/pods/` +Note: when using `resource_type: 'pod'` logs will be enriched only with pod +metadata: pod id, pod name, etc., not container metadata. +*`container`: to make the lookup based on the container ID, `logs_path` must +be set to `/var/log/containers/`. +It defaults to `container`. + +To be able to use `logs_path` matcher filebeat input path must be a subdirectory +of directory defined in `logs_path` configuration setting. The default configuration is able to lookup the metadata using the container ID when the logs are collected from the default docker logs path From 516cb13545ed835a203457c24b64aef54efe8c86 Mon Sep 17 00:00:00 2001 From: Tetiana Kravchenko Date: Mon, 8 Nov 2021 15:08:26 +0100 Subject: [PATCH 2/7] Apply suggestions from code review Co-authored-by: Chris Mark --- .../docs/indexers_and_matchers.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libbeat/processors/add_kubernetes_metadata/docs/indexers_and_matchers.asciidoc b/libbeat/processors/add_kubernetes_metadata/docs/indexers_and_matchers.asciidoc index 035977bb9431..71af7e00dc6b 100644 --- a/libbeat/processors/add_kubernetes_metadata/docs/indexers_and_matchers.asciidoc +++ b/libbeat/processors/add_kubernetes_metadata/docs/indexers_and_matchers.asciidoc @@ -84,7 +84,7 @@ This matcher has the following configuration settings: `logs_path`:: (Optional) Base path of container logs. If not specified, it uses the default logs path of the platform where {beatname_uc} is running: for Linux - -`/var/lib/docker/containers/`, Windowd - `C:\\ProgramData\\Docker\\containers`. +`/var/lib/docker/containers/`, Windows - `C:\\ProgramData\\Docker\\containers`. To change the default value: container ID must follow right after the `logs_path` - `/`, where `container_id` is a 64-character-long hexadecimal string. @@ -96,7 +96,7 @@ Valid `resource_type`: ** `/var/lib/kubelet/pods/` used to read logs from mounted into the pod volumes, those logs end up under `/var/lib/kubelet/pods//volumes//...` To use `/var/lib/kubelet/pods/` as a `log_path`, `/var/lib/kubelet/pods` must be -mounted into the filebear pods. +mounted into the filebeat Pods. ** `/var/log/pods/` Note: when using `resource_type: 'pod'` logs will be enriched only with pod metadata: pod id, pod name, etc., not container metadata. From dc3553b2f2a4f2e7541442f170d5dcd644d00b28 Mon Sep 17 00:00:00 2001 From: Tetiana Kravchenko Date: Mon, 8 Nov 2021 16:03:38 +0100 Subject: [PATCH 3/7] add record to CHANGELOG.next.asciidoc Signed-off-by: Tetiana Kravchenko --- CHANGELOG.next.asciidoc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 3c32397c6b7e..5e9e773c9a8a 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -319,6 +319,8 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Update `aws-s3` input to connect to non AWS S3 buckets {issue}28222[28222] {pull}28234[28234] - Sophos UTM: Support logs containing hostname in syslog header. {pull}28638[28638] - Moving Oracle Filebeat module to GA. {pull}28754[28754] +- Add support for '/var/log/pods/' path for add_kubernetes_metadata processor with `resource_type: pod`. {pull}28868[28868] +- Add documentation for add_kubernetes_metadata processors `log_path` matcher. {pull}28868[28868] *Heartbeat* From e8ab1fed7e7d474f0601a3145f5799128a581f76 Mon Sep 17 00:00:00 2001 From: Tetiana Kravchenko Date: Tue, 9 Nov 2021 13:11:29 +0100 Subject: [PATCH 4/7] address comments: log pod id instead of array; log event Signed-off-by: Tetiana Kravchenko --- filebeat/processor/add_kubernetes_metadata/matchers.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/filebeat/processor/add_kubernetes_metadata/matchers.go b/filebeat/processor/add_kubernetes_metadata/matchers.go index 93174a407137..35866e45166d 100644 --- a/filebeat/processor/add_kubernetes_metadata/matchers.go +++ b/filebeat/processor/add_kubernetes_metadata/matchers.go @@ -81,7 +81,7 @@ const containerIdLen = 64 func (f *LogPathMatcher) MetadataIndex(event common.MapStr) string { value, err := event.GetValue("log.file.path") if err != nil { - f.logger.Errorf("Error extracting log.file.path from the event") + f.logger.Debugf("Error extracting log.file.path from the event: %s.", event) return "" } @@ -119,7 +119,7 @@ func (f *LogPathMatcher) MetadataIndex(event common.MapStr) string { if len(pathDirs) > podUIDPos { podUID := strings.Split(pathDirs[podUIDPos], "_") if len(podUID) > 2 { - f.logger.Debugf("Using pod uid: %s", podUID) + f.logger.Debugf("Using pod uid: %s", podUID[2]) return podUID[2] } } From ab3c82161a433583dabb68e3499aa83e22990e21 Mon Sep 17 00:00:00 2001 From: Tetiana Kravchenko Date: Tue, 9 Nov 2021 13:12:58 +0100 Subject: [PATCH 5/7] add validation for logs_path matchers config Signed-off-by: Tetiana Kravchenko --- .../add_kubernetes_metadata/matchers.go | 3 +- .../add_kubernetes_metadata/config.go | 29 ++++++++ .../add_kubernetes_metadata/config_test.go | 73 +++++++++++++++++++ 3 files changed, 103 insertions(+), 2 deletions(-) diff --git a/filebeat/processor/add_kubernetes_metadata/matchers.go b/filebeat/processor/add_kubernetes_metadata/matchers.go index 35866e45166d..7e7996fede72 100644 --- a/filebeat/processor/add_kubernetes_metadata/matchers.go +++ b/filebeat/processor/add_kubernetes_metadata/matchers.go @@ -125,8 +125,7 @@ func (f *LogPathMatcher) MetadataIndex(event common.MapStr) string { } } - f.logger.Error(`Error extracting pod uid - source value does not contains matcher's logs_path, - supported log_path for 'pod' resource_type: '/var/lib/kubelet/pods/', '/var/log/pods/'.`) + f.logger.Error("Error extracting pod uid - source value does not contains matcher's logs_path") return "" } } diff --git a/libbeat/processors/add_kubernetes_metadata/config.go b/libbeat/processors/add_kubernetes_metadata/config.go index 6e7384030871..41bf543b1f0e 100644 --- a/libbeat/processors/add_kubernetes_metadata/config.go +++ b/libbeat/processors/add_kubernetes_metadata/config.go @@ -69,5 +69,34 @@ func (k *kubeAnnotatorConfig) Validate() error { k.Host = "" } + for _, matcher := range k.Matchers { + if matcherCfg, ok := matcher["logs_path"]; ok { + if matcherCfg.HasField("resource_type") { + logsPathMatcher := struct { + LogsPath string `config:"logs_path"` + ResourceType string `config:"resource_type"` + }{} + + err := matcherCfg.Unpack(&logsPathMatcher) + if err != nil { + return fmt.Errorf("fail to unpack the `logs_path` matcher configuration: %s", err) + } + if logsPathMatcher.LogsPath == "" { + return fmt.Errorf("invalid logs_path matcher configuration: when resource_type is defined, logs_path must be set as well") + } + if logsPathMatcher.ResourceType != "pod" && logsPathMatcher.ResourceType != "container" { + return fmt.Errorf("invalid resource_type %s, valid values include `pod`, `container`", logsPathMatcher.ResourceType) + } + if logsPathMatcher.ResourceType == "pod" && !(logsPathMatcher.LogsPath == "/var/lib/kubelet/pods/" || logsPathMatcher.LogsPath == "/var/log/pods/") { + return fmt.Errorf("invalid logs_path defined for resource_type: %s, valid values include `/var/lib/kubelet/pods/`, `/var/log/pods/`", logsPathMatcher.ResourceType) + } + if logsPathMatcher.ResourceType == "container" && logsPathMatcher.LogsPath != "/var/log/containers/" { + return fmt.Errorf("invalid logs_path defined for resource_type: %s, valid value is `/var/log/containers/`", logsPathMatcher.ResourceType) + } + } + + } + } + return nil } diff --git a/libbeat/processors/add_kubernetes_metadata/config_test.go b/libbeat/processors/add_kubernetes_metadata/config_test.go index 3bdcf34a1d7b..f7cc83812f66 100644 --- a/libbeat/processors/add_kubernetes_metadata/config_test.go +++ b/libbeat/processors/add_kubernetes_metadata/config_test.go @@ -60,3 +60,76 @@ func TestConfigValidate(t *testing.T) { } } } + +func TestConfigValidate_LogsPatchMatcher(t *testing.T) { + tests := []struct { + matcherName string + matcherConfig map[string]interface{} + error bool + }{ + { + matcherName: "", + matcherConfig: map[string]interface{}{}, + error: false, + }, + { + matcherName: "logs_path", + matcherConfig: map[string]interface{}{ + "resource_type": "pod", + }, + error: true, + }, + { + matcherName: "logs_path", + matcherConfig: map[string]interface{}{ + "resource_type": "pod", + "invalid_field": "invalid_value", + }, + error: true, + }, + { + matcherName: "logs_path", + matcherConfig: map[string]interface{}{ + "resource_type": "pod", + "logs_path": "/var/log/invalid/path/", + }, + error: true, + }, + { + matcherName: "logs_path", + matcherConfig: map[string]interface{}{ + "resource_type": "pod", + "logs_path": "/var/log/pods/", + }, + error: false, + }, + { + matcherName: "logs_path", + matcherConfig: map[string]interface{}{ + "resource_type": "container", + "logs_path": "/var/log/containers/", + }, + error: false, + }, + } + + for _, test := range tests { + cfg, _ := common.NewConfigFrom(test.matcherConfig) + + c := defaultKubernetesAnnotatorConfig() + c.DefaultMatchers = Enabled{false} + + err := cfg.Unpack(&c) + c.Matchers = PluginConfig{ + { + test.matcherName: *cfg, + }, + } + err = c.Validate() + if test.error { + require.NotNil(t, err) + } else { + require.Nil(t, err) + } + } +} From 2dfe013789a4ee5f396881ae83808cd6b76a90db Mon Sep 17 00:00:00 2001 From: Tetiana Kravchenko Date: Tue, 9 Nov 2021 13:51:55 +0100 Subject: [PATCH 6/7] add comment for the config validation check Signed-off-by: Tetiana Kravchenko --- libbeat/processors/add_kubernetes_metadata/config.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libbeat/processors/add_kubernetes_metadata/config.go b/libbeat/processors/add_kubernetes_metadata/config.go index 41bf543b1f0e..ecc9b5919feb 100644 --- a/libbeat/processors/add_kubernetes_metadata/config.go +++ b/libbeat/processors/add_kubernetes_metadata/config.go @@ -69,6 +69,9 @@ func (k *kubeAnnotatorConfig) Validate() error { k.Host = "" } + // Checks below were added to warn the users early on and avoid initialising the processor in case the `logs_path` + // matcher config is not valid: supported paths defined as a `logs_path` configuration setting are strictly defined + // if `resource_type` is set for _, matcher := range k.Matchers { if matcherCfg, ok := matcher["logs_path"]; ok { if matcherCfg.HasField("resource_type") { From de580764dedd1a58e073b90c1037f0ff7ea04a72 Mon Sep 17 00:00:00 2001 From: Tetiana Kravchenko Date: Tue, 9 Nov 2021 18:12:26 +0100 Subject: [PATCH 7/7] set different sourcePath for windows in tests Signed-off-by: Tetiana Kravchenko --- .../processor/add_kubernetes_metadata/matchers_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/filebeat/processor/add_kubernetes_metadata/matchers_test.go b/filebeat/processor/add_kubernetes_metadata/matchers_test.go index 7bbaf589a5cd..27e376c88161 100644 --- a/filebeat/processor/add_kubernetes_metadata/matchers_test.go +++ b/filebeat/processor/add_kubernetes_metadata/matchers_test.go @@ -136,7 +136,13 @@ func TestLogsPathMatcher_InvalidVarLogPodIDFormat(t *testing.T) { func TestLogsPathMatcher_ValidVarLogPod(t *testing.T) { cfgLogsPath := "/var/log/pods/" cfgResourceType := "pod" - source := fmt.Sprintf("/var/log/pods/namespace_pod-name_%s/container/0.log", puid) + sourcePath := "/var/log/pods/namespace_pod-name_%s/container/0.log" + + if runtime.GOOS == "windows" { + cfgLogsPath = "C:\\var\\log\\pods\\" + sourcePath = "C:\\var\\log\\pods\\namespace_pod-name_%s\\container\\0.log" + } + source := fmt.Sprintf(sourcePath, puid) expectedResult := puid executeTestWithResourceType(t, cfgLogsPath, cfgResourceType, source, expectedResult) }