Skip to content

Commit

Permalink
perf: use concurrent map when storing metrics
Browse files Browse the repository at this point in the history
In busy/large clusters, will prevent timeouts from long living
locks/concurrency issues, as the writing to the map takes overly long,
blocking the metrics-reading thread and as the lock doesn't get released
in a timely manner, timing out the request.

Inpired by previous PR at #1028
  • Loading branch information
rarruda committed Sep 25, 2024
1 parent f50205a commit 3e2d1e9
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 31 deletions.
23 changes: 5 additions & 18 deletions pkg/metrics_store/metrics_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ import (
"sync"

"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/types"

"k8s.io/kube-state-metrics/v2/pkg/metric"
)

Expand All @@ -33,7 +31,7 @@ type MetricsStore struct {
// metric families, containing a slice of metrics. We need to keep metrics
// grouped by metric families in order to zip families with their help text in
// MetricsStore.WriteAll().
metrics map[types.UID][][]byte
metrics sync.Map

// generateMetricsFunc generates metrics based on a given Kubernetes object
// and returns them grouped by metric family.
Expand All @@ -42,17 +40,14 @@ type MetricsStore struct {
// later on zipped with with their corresponding metric families in
// MetricStore.WriteAll().
headers []string

// Protects metrics
mutex sync.RWMutex
}

// NewMetricsStore returns a new MetricsStore
func NewMetricsStore(headers []string, generateFunc func(interface{}) []metric.FamilyInterface) *MetricsStore {
return &MetricsStore{
generateMetricsFunc: generateFunc,
headers: headers,
metrics: map[types.UID][][]byte{},
metrics: sync.Map{},
}
}

Expand All @@ -66,17 +61,14 @@ func (s *MetricsStore) Add(obj interface{}) error {
return err
}

s.mutex.Lock()
defer s.mutex.Unlock()

families := s.generateMetricsFunc(obj)
familyStrings := make([][]byte, len(families))

for i, f := range families {
familyStrings[i] = f.ByteSlice()
}

s.metrics[o.GetUID()] = familyStrings
s.metrics.Store(o.GetUID(), familyStrings)

return nil
}
Expand All @@ -95,10 +87,7 @@ func (s *MetricsStore) Delete(obj interface{}) error {
return err
}

s.mutex.Lock()
defer s.mutex.Unlock()

delete(s.metrics, o.GetUID())
s.metrics.Delete(o.GetUID())

return nil
}
Expand Down Expand Up @@ -126,9 +115,7 @@ func (s *MetricsStore) GetByKey(_ string) (item interface{}, exists bool, err er
// Replace will delete the contents of the store, using instead the
// given list.
func (s *MetricsStore) Replace(list []interface{}, _ string) error {
s.mutex.Lock()
s.metrics = map[types.UID][][]byte{}
s.mutex.Unlock()
s.metrics.Clear()

for _, o := range list {
err := s.Add(o)
Expand Down
30 changes: 17 additions & 13 deletions pkg/metrics_store/metrics_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,31 +56,35 @@ func (m MetricsWriter) WriteAll(w io.Writer) error {
return nil
}

for _, s := range m.stores {
s.mutex.RLock()
defer func(s *MetricsStore) {
s.mutex.RUnlock()
}(s)
}

for i, help := range m.stores[0].headers {
if help != "" && help != "\n" {
help += "\n"
}

if len(m.stores[0].metrics) > 0 {
_, err := w.Write([]byte(help))
var err error
m.stores[0].metrics.Range(func(key interface{}, value interface{}) bool {

Check failure on line 65 in pkg/metrics_store/metrics_writer.go

View workflow job for this annotation

GitHub Actions / ci-go-lint

unused-parameter: parameter 'key' seems to be unused, consider removing or renaming it as _ (revive)
_, err = w.Write([]byte(help))
if err != nil {
return fmt.Errorf("failed to write help text: %v", err)
err = fmt.Errorf("failed to write help text: %v", err)
}
return false
})
if err != nil {
return err
}

for _, s := range m.stores {
for _, metricFamilies := range s.metrics {
_, err := w.Write(metricFamilies[i])
s.metrics.Range(func(key interface{}, value interface{}) bool {

Check failure on line 77 in pkg/metrics_store/metrics_writer.go

View workflow job for this annotation

GitHub Actions / ci-go-lint

unused-parameter: parameter 'key' seems to be unused, consider removing or renaming it as _ (revive)
metricFamilies := value.([][]byte)
_, err = w.Write(metricFamilies[i])
if err != nil {
return fmt.Errorf("failed to write metrics family: %v", err)
err = fmt.Errorf("failed to write metrics family: %v", err)
return false
}
return true
})
if err != nil {
return err
}
}
}
Expand Down

0 comments on commit 3e2d1e9

Please sign in to comment.