Skip to content

Commit

Permalink
add node numa metric emitter and support memory bandwidth related metric
Browse files Browse the repository at this point in the history
  • Loading branch information
luomingmeng committed Dec 16, 2024
1 parent c1ff5df commit 1774931
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ type MetricEmitterPluginOptions struct {

NodeMetricLabels []string
NodeMetricMapping map[string]string
NUMAMetricMapping map[string]string

MetricSyncers []string
}
Expand Down Expand Up @@ -74,6 +75,8 @@ func (o *MetricEmitterPluginOptions) AddFlags(fss *cliflag.NamedFlagSets) {
"node labels to be added in metric selector lists")
fs.StringToStringVar(&o.NodeMetricMapping, "metric-node-metrics", o.NodeMetricMapping,
"the metric name for node-level to override the default collecting logic")
fs.StringToStringVar(&o.NUMAMetricMapping, "metric-node-numa-metrics", o.NUMAMetricMapping,
"the metric name for node-numa-level to override the default collecting logic")

fs.StringSliceVar(&o.MetricSyncers, "metric-syncers", o.MetricSyncers,
"those syncers that should be enabled")
Expand Down Expand Up @@ -106,6 +109,10 @@ func (o *MetricEmitterPluginOptions) ApplyTo(c *metricemitter.MetricEmitterPlugi
c.MetricEmitterNodeConfiguration.MetricMapping = o.NodeMetricMapping
}

if o.NUMAMetricMapping != nil && len(o.NUMAMetricMapping) != 0 {
c.MetricEmitterNodeConfiguration.NUMAMetricMapping = o.NUMAMetricMapping
}

c.MetricSyncers = o.MetricSyncers
return nil
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ require (
)

replace (
github.com/kubewharf/katalyst-api => github.com/luomingmeng/katalyst-api v0.0.0-20241211184538-e538b25e3c9e
k8s.io/api => k8s.io/api v0.24.6
k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6
k8s.io/apimachinery => k8s.io/apimachinery v0.24.6
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,8 @@ github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0U
github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc=
github.com/logrusorgru/aurora v0.0.0-20181002194514-a7b3b318ed4e/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4=
github.com/lpabon/godbc v0.1.1/go.mod h1:Jo9QV0cf3U6jZABgiJ2skINAXb9j8m51r07g4KI92ZA=
github.com/luomingmeng/katalyst-api v0.0.0-20241211184538-e538b25e3c9e h1:Lh1j8V/PQeGDvPHI7OG0KyArQXK3N5sJCYoIL38O22U=
github.com/luomingmeng/katalyst-api v0.0.0-20241211184538-e538b25e3c9e/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
Expand Down
63 changes: 59 additions & 4 deletions pkg/agent/sysadvisor/plugin/metric-emitter/syncer/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,16 @@ var nodeRawMetricNameMapping = map[string]string{
consts.MetricMemAvailableSystem: apimetricnode.CustomMetricNodeMemoryAvailable,
}

// nodeCachedMetricNameMapping maps the cached metricName (processed by plugin.SysAdvisorPlugin)
// nodeNUMARawMetricNameMapping maps the raw metricName (collected from agent.MetricsFetcher)
// to the standard metricName (used by custom-metric-api-server)
var nodeNUMARawMetricNameMapping = map[string]string{
consts.MetricMemBandwidthReadNuma: apimetricnode.CustomMetricNUMAMemoryBandwidthRead,
consts.MetricMemBandwidthWriteNuma: apimetricnode.CustomMetricNUMAMemoryBandwidthWrite,
}

type MetricSyncerNode struct {
metricMapping map[string]string
metricMapping map[string]string
numaMetricMapping map[string]string

conf *metricemitter.MetricEmitterPluginConfiguration
node *v1.Node
Expand All @@ -79,9 +84,11 @@ func NewMetricSyncerNode(conf *config.Configuration, _ interface{},
}

metricMapping := general.MergeMap(nodeRawMetricNameMapping, conf.MetricEmitterNodeConfiguration.MetricMapping)
numaMetricMapping := general.MergeMap(nodeNUMARawMetricNameMapping, conf.MetricEmitterNodeConfiguration.NUMAMetricMapping)

return &MetricSyncerNode{
metricMapping: metricMapping,
metricMapping: metricMapping,
numaMetricMapping: numaMetricMapping,

conf: conf.AgentConfiguration.MetricEmitterPluginConfiguration,

Expand All @@ -98,16 +105,29 @@ func (n *MetricSyncerNode) Name() string {

func (n *MetricSyncerNode) Run(ctx context.Context) {
rChan := make(chan metrictypes.NotifiedResponse, 20)
rNUMAChan := make(chan metrictypes.NotifiedResponse, 20)
go n.receiveRawNode(ctx, rChan)
go wait.Until(func() { n.advisorMetric(ctx) }, time.Second*3, ctx.Done())
go n.receiveRawNodeNUMA(ctx, rNUMAChan)

go wait.Until(func() { n.advisorMetric(ctx) }, time.Second*3, ctx.Done())
// there is no need to deRegister for node-related metric
for rawMetricName := range n.metricMapping {
klog.Infof("register raw node metric: %v", rawMetricName)
n.metaServer.MetricsFetcher.RegisterNotifier(metrictypes.MetricsScopeNode, metrictypes.NotifiedRequest{
MetricName: rawMetricName,
}, rChan)
}

// there is no need to deRegister for numa-related metric
for rawNUMAMetricName := range n.numaMetricMapping {
for _, numaID := range n.metaServer.CPUDetails.NUMANodes().ToSliceNoSortInt() {
klog.Infof("register raw node numa metric: %v, numa: %d", rawNUMAMetricName, numaID)
n.metaServer.MetricsFetcher.RegisterNotifier(metrictypes.MetricsScopeNuma, metrictypes.NotifiedRequest{
MetricName: rawNUMAMetricName,
NumaID: numaID,
}, rChan)
}
}
}

// receiveRawNode receives notified response from raw data source
Expand Down Expand Up @@ -142,6 +162,41 @@ func (n *MetricSyncerNode) receiveRawNode(ctx context.Context, rChan chan metric
}
}

func (n *MetricSyncerNode) receiveRawNodeNUMA(ctx context.Context, rChan chan metrictypes.NotifiedResponse) {
for {
select {
case response := <-rChan:
if response.Req.MetricName == "" {
continue
} else if response.Time == nil {
continue
}

targetMetricName, ok := n.numaMetricMapping[response.Req.MetricName]
if !ok {
klog.Warningf("invalid npde numa raw metric name: %v", response.Req.MetricName)
continue
}

klog.V(4).Infof("get metric %v for node, numa %v", response.Req.MetricName, response.Req.NumaNode)
if tags := n.generateMetricTag(ctx); len(tags) > 0 {
_ = n.dataEmitter.StoreFloat64(targetMetricName, response.Value, metrics.MetricTypeNameRaw, append(tags,
metrics.MetricTag{
Key: fmt.Sprintf("%s", data.CustomMetricLabelKeyTimestamp),
Val: fmt.Sprintf("%v", response.Time.UnixMilli()),
},
metrics.MetricTag{
Key: fmt.Sprintf("%snuma", data.CustomMetricLabelSelectorPrefixKey),
Val: response.Req.NumaNode,
})...)
}
case <-ctx.Done():
klog.Infof("metric emitter for node numa has been stopped")
return
}
}
}

// generateMetricTag generates tags that are bounded to current Node object
func (n *MetricSyncerNode) generateMetricTag(ctx context.Context) (tags []metrics.MetricTag) {
if n.node == nil && n.metaServer != nil && n.metaServer.NodeFetcher != nil {
Expand Down
6 changes: 4 additions & 2 deletions pkg/agent/sysadvisor/plugin/metric-emitter/syncer/pod/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@ var podRawMetricNameMapping = map[string]string{
consts.MetricCPUUsageRatioContainer: apimetricpod.CustomMetricPodCPUUsageRatio,
consts.MetricCPUCPIContainer: apimetricpod.CustomMetricPodCPUCPI,

consts.MetricMemRssContainer: apimetricpod.CustomMetricPodMemoryRSS,
consts.MetricMemUsageContainer: apimetricpod.CustomMetricPodMemoryUsage,
consts.MetricMemRssContainer: apimetricpod.CustomMetricPodMemoryRSS,
consts.MetricMemUsageContainer: apimetricpod.CustomMetricPodMemoryUsage,
consts.MetricMemBandwidthReadContainer: apimetricpod.CustomMetricPodMemoryReadBandwidth,
consts.MetricMemBandwidthWriteContainer: apimetricpod.CustomMetricPodMemoryWriteBandwidth,
}

type podRawChanel struct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ type MetricEmitterNodeConfiguration struct {

// MetricMapping will override the default to-collected metrics in node-level
MetricMapping map[string]string

// NUMAMetricMapping will override the default to-collected metrics in numa-level
NUMAMetricMapping map[string]string
}

type MetricEmitterExternalConfiguration struct{}
Expand Down

0 comments on commit 1774931

Please sign in to comment.