From 3fbc93ee5241c2460aa3a5cbbc5e9663b676e0d4 Mon Sep 17 00:00:00 2001 From: Daxin Wang <46807570+dxsup@users.noreply.github.com> Date: Thu, 1 Dec 2022 19:20:21 +0800 Subject: [PATCH] Add a new clustering method "blank" (#372) Signed-off-by: Daxin Wang <daxinwang@harmonycloud.cn> --- CHANGELOG.md | 2 +- collector/docker/kindling-collector-config.yml | 3 ++- .../analyzer/network/protocol/http/http_parser.go | 10 +--------- collector/pkg/urlclustering/blank.go | 14 ++++++++++++++ collector/pkg/urlclustering/factory.go | 14 ++++++++++++++ deploy/agent/kindling-collector-config.yml | 3 ++- 6 files changed, 34 insertions(+), 12 deletions(-) create mode 100644 collector/pkg/urlclustering/blank.go create mode 100644 collector/pkg/urlclustering/factory.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 0eee52d26..6aae0b8c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ ### Enhancements - Add payload for all protocols.([#375](https://github.com/KindlingProject/kindling/pull/375)) - -- +- Add a new clustering method "blank" that is used to reduce the cardinality of metrics as much as possible. ([#372](https://github.com/KindlingProject/kindling/pull/372)) ### Bug fixes - diff --git a/collector/docker/kindling-collector-config.yml b/collector/docker/kindling-collector-config.yml index 42eb4e8cd..d28d55648 100644 --- a/collector/docker/kindling-collector-config.yml +++ b/collector/docker/kindling-collector-config.yml @@ -56,10 +56,11 @@ analyzers: protocol_parser: [ http, mysql, dns, redis, kafka, rocketmq ] # Which URL clustering method should be used to shorten the URL of HTTP request. # This is useful for decrease the cardinality of URLs. - # Valid values: ["noparam", "alphabet"] + # Valid values: ["noparam", "alphabet", "blank"] # - noparam: Only trim the trailing parameters behind the character '?' # - alphabet: Trim the trailing parameters and Convert the segments # containing non-alphabetical characters to star(*) + # - blank: Turn endpoints to empty. This is used to reduce the cardinality as much as possible. url_clustering_method: alphabet # If the destination port of data is one of the followings, the protocol of such network request # is set to the corresponding one. Note the program will try to identify the protocol automatically diff --git a/collector/pkg/component/analyzer/network/protocol/http/http_parser.go b/collector/pkg/component/analyzer/network/protocol/http/http_parser.go index f1e614a37..ff8af4c58 100644 --- a/collector/pkg/component/analyzer/network/protocol/http/http_parser.go +++ b/collector/pkg/component/analyzer/network/protocol/http/http_parser.go @@ -8,15 +8,7 @@ import ( ) func NewHttpParser(urlClusteringMethod string) *protocol.ProtocolParser { - var method urlclustering.ClusteringMethod - switch urlClusteringMethod { - case "alphabet": - method = urlclustering.NewAlphabeticalClusteringMethod() - case "noparam": - method = urlclustering.NewNoParamClusteringMethod() - default: - method = urlclustering.NewAlphabeticalClusteringMethod() - } + method := urlclustering.NewMethod(urlClusteringMethod) requestParser := protocol.CreatePkgParser(fastfailHttpRequest(), parseHttpRequest(method)) responseParser := protocol.CreatePkgParser(fastfailHttpResponse(), parseHttpResponse()) diff --git a/collector/pkg/urlclustering/blank.go b/collector/pkg/urlclustering/blank.go new file mode 100644 index 000000000..d8a2b4685 --- /dev/null +++ b/collector/pkg/urlclustering/blank.go @@ -0,0 +1,14 @@ +package urlclustering + +// BlankClusteringMethod removes the endpoint and return an empty string. +// This method is used to reduce the cardinality as much as possible. +type BlankClusteringMethod struct { +} + +func NewBlankClusteringMethod() ClusteringMethod { + return &BlankClusteringMethod{} +} + +func (m *BlankClusteringMethod) Clustering(_ string) string { + return "" +} diff --git a/collector/pkg/urlclustering/factory.go b/collector/pkg/urlclustering/factory.go new file mode 100644 index 000000000..ead91dbc5 --- /dev/null +++ b/collector/pkg/urlclustering/factory.go @@ -0,0 +1,14 @@ +package urlclustering + +func NewMethod(urlClusteringMethod string) ClusteringMethod { + switch urlClusteringMethod { + case "alphabet": + return NewAlphabeticalClusteringMethod() + case "noparam": + return NewNoParamClusteringMethod() + case "blank": + return NewBlankClusteringMethod() + default: + return NewAlphabeticalClusteringMethod() + } +} diff --git a/deploy/agent/kindling-collector-config.yml b/deploy/agent/kindling-collector-config.yml index 6247c67c6..4ea2e82ac 100644 --- a/deploy/agent/kindling-collector-config.yml +++ b/deploy/agent/kindling-collector-config.yml @@ -56,10 +56,11 @@ analyzers: protocol_parser: [ http, mysql, dns, redis, kafka, rocketmq ] # Which URL clustering method should be used to shorten the URL of HTTP request. # This is useful for decrease the cardinality of URLs. - # Valid values: ["noparam", "alphabet"] + # Valid values: ["noparam", "alphabet", "blank"] # - noparam: Only trim the trailing parameters behind the character '?' # - alphabet: Trim the trailing parameters and Convert the segments # containing non-alphabetical characters to star(*) + # - blank: Turn endpoints to empty. This is used to reduce the cardinality as much as possible. url_clustering_method: alphabet # If the destination port of data is one of the followings, the protocol of such network request # is set to the corresponding one. Note the program will try to identify the protocol automatically