Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add zfs pool stats collection. #427

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 72 additions & 7 deletions plugins/zfs/zfs.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package zfs

import (
"fmt"
"path/filepath"
"strconv"
"strings"
Expand All @@ -12,6 +13,12 @@ import (
type Zfs struct {
KstatPath string
KstatMetrics []string
PoolMetrics bool
}

type poolInfo struct {
name string
ioFilename string
}

var sampleConfig = `
Expand All @@ -22,6 +29,9 @@ var sampleConfig = `
# By default, telegraf gather all zfs stats
# If not specified, then default is:
# kstatMetrics = ["arcstats", "zfetchstats", "vdev_cache_stats"]
#
# By default, don't gather zpool stats
# poolMetrics = false
`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please keep getTags and make getPoolStats a separate function

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know it isn't ideal, I wanted to avoid doing the directory scan a second time for each Gather call.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I see what you mean, one thing you could do is make tags a private variable on the Zfs struct. Then you can set z.tags["pool"] = ... within this function, and don't return a map in that case.

You can either pass in the Zfs struct or make this function have a receiver (z *Zfs)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking about putting the directory scan into a getPools() that returns a slice of structs containing the pool name and io path. Then the getPoolsTag() and gatherPoolStats() will both take this this as the input. Do you like that better than adding a tags property to Zfs?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@allenpetersen that works as well, 👍


func (z *Zfs) SampleConfig() string {
Expand All @@ -32,18 +42,63 @@ func (z *Zfs) Description() string {
return "Read metrics of ZFS from arcstats, zfetchstats and vdev_cache_stats"
}

func getTags(kstatPath string) map[string]string {
var pools string
func getPools(kstatPath string) []poolInfo {
pools := make([]poolInfo, 0)
poolsDirs, _ := filepath.Glob(kstatPath + "/*/io")

for _, poolDir := range poolsDirs {
poolDirSplit := strings.Split(poolDir, "/")
pool := poolDirSplit[len(poolDirSplit)-2]
if len(pools) != 0 {
pools += "::"
pools = append(pools, poolInfo{name: pool, ioFilename: poolDir})
}

return pools
}

func getTags(pools []poolInfo) map[string]string {
var poolNames string

for _, pool := range pools {
if len(poolNames) != 0 {
poolNames += "::"
}
poolNames += pool.name
}

return map[string]string{"pools": poolNames}
}

func gatherPoolStats(pool poolInfo, acc plugins.Accumulator) error {
lines, err := internal.ReadLines(pool.ioFilename)
if err != nil {
return err
}

if len(lines) != 3 {
return err
}

keys := strings.Fields(lines[1])
values := strings.Fields(lines[2])

keyCount := len(keys)

if keyCount != len(values) {
return fmt.Errorf("Key and value count don't match Keys:%v Values:%v", keys, values)
}

tag := map[string]string{"pool": pool.name}

for i := 0; i < keyCount; i++ {
value, err := strconv.ParseInt(values[i], 10, 64)
if err != nil {
return err
}
pools += pool

acc.Add(keys[i], value, tag)
}
return map[string]string{"pools": pools}

return nil
}

func (z *Zfs) Gather(acc plugins.Accumulator) error {
Expand All @@ -57,7 +112,17 @@ func (z *Zfs) Gather(acc plugins.Accumulator) error {
kstatPath = "/proc/spl/kstat/zfs"
}

tags := getTags(kstatPath)
pools := getPools(kstatPath)
tags := getTags(pools)

if z.PoolMetrics {
for _, pool := range pools {
err := gatherPoolStats(pool, acc)
if err != nil {
return err
}
}
}

for _, metric := range kstatMetrics {
lines, err := internal.ReadLines(kstatPath + "/" + metric)
Expand Down
212 changes: 163 additions & 49 deletions plugins/zfs/zfs_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package zfs

import (
"fmt"
"io/ioutil"
"os"
"testing"
Expand Down Expand Up @@ -121,6 +120,10 @@ delegations 4 0
hits 4 0
misses 4 0
`
const pool_ioContents = `11 3 0x00 1 80 2225326830828 32953476980628
nread nwritten reads writes wtime wlentime wupdate rtime rlentime rupdate wcnt rcnt
1884160 6450688 22 978 272187126 2850519036 2263669418655 424226814 2850519036 2263669871823 0 0
`

var testKstatPath = os.TempDir() + "/telegraf/proc/spl/kstat/zfs"

Expand All @@ -129,6 +132,50 @@ type metrics struct {
value int64
}

func TestZfsPoolMetrics(t *testing.T) {
err := os.MkdirAll(testKstatPath, 0755)
require.NoError(t, err)

err = os.MkdirAll(testKstatPath+"/HOME", 0755)
require.NoError(t, err)

err = ioutil.WriteFile(testKstatPath+"/HOME/io", []byte(pool_ioContents), 0644)
require.NoError(t, err)

err = ioutil.WriteFile(testKstatPath+"/arcstats", []byte(arcstatsContents), 0644)
require.NoError(t, err)

poolMetrics := getPoolMetrics()

var acc testutil.Accumulator

//one pool, all metrics
tags := map[string]string{
"pool": "HOME",
}

z := &Zfs{KstatPath: testKstatPath, KstatMetrics: []string{"arcstats"}}
err = z.Gather(&acc)
require.NoError(t, err)

for _, metric := range poolMetrics {
assert.True(t, !acc.HasIntValue(metric.name), metric.name)
assert.True(t, !acc.CheckTaggedValue(metric.name, metric.value, tags))
}

z = &Zfs{KstatPath: testKstatPath, KstatMetrics: []string{"arcstats"}, PoolMetrics: true}
err = z.Gather(&acc)
require.NoError(t, err)

for _, metric := range poolMetrics {
assert.True(t, acc.HasIntValue(metric.name), metric.name)
assert.True(t, acc.CheckTaggedValue(metric.name, metric.value, tags))
}

err = os.RemoveAll(os.TempDir() + "/telegraf")
require.NoError(t, err)
}

func TestZfsGeneratesMetrics(t *testing.T) {
err := os.MkdirAll(testKstatPath, 0755)
require.NoError(t, err)
Expand All @@ -148,7 +195,64 @@ func TestZfsGeneratesMetrics(t *testing.T) {
err = ioutil.WriteFile(testKstatPath+"/vdev_cache_stats", []byte(vdev_cache_statsContents), 0644)
require.NoError(t, err)

intMetrics := []*metrics{
intMetrics := getKstatMetricsAll()

var acc testutil.Accumulator

//one pool, all metrics
tags := map[string]string{
"pools": "HOME",
}

z := &Zfs{KstatPath: testKstatPath}
err = z.Gather(&acc)
require.NoError(t, err)

for _, metric := range intMetrics {
assert.True(t, acc.HasIntValue(metric.name), metric.name)
assert.True(t, acc.CheckTaggedValue(metric.name, metric.value, tags))
}

//two pools, all metrics
err = os.MkdirAll(testKstatPath+"/STORAGE", 0755)
require.NoError(t, err)

err = ioutil.WriteFile(testKstatPath+"/STORAGE/io", []byte(""), 0644)
require.NoError(t, err)

tags = map[string]string{
"pools": "HOME::STORAGE",
}

z = &Zfs{KstatPath: testKstatPath}
acc = testutil.Accumulator{}
err = z.Gather(&acc)
require.NoError(t, err)

for _, metric := range intMetrics {
assert.True(t, acc.HasIntValue(metric.name), metric.name)
assert.True(t, acc.CheckTaggedValue(metric.name, metric.value, tags))
}

intMetrics = getKstatMetricsArcOnly()

//two pools, one metric
z = &Zfs{KstatPath: testKstatPath, KstatMetrics: []string{"arcstats"}}
acc = testutil.Accumulator{}
err = z.Gather(&acc)
require.NoError(t, err)

for _, metric := range intMetrics {
assert.True(t, acc.HasIntValue(metric.name), metric.name)
assert.True(t, acc.CheckTaggedValue(metric.name, metric.value, tags))
}

err = os.RemoveAll(os.TempDir() + "/telegraf")
require.NoError(t, err)
}

func getKstatMetricsArcOnly() []*metrics {
return []*metrics{
{
name: "arcstats_hits",
value: 5968846374,
Expand Down Expand Up @@ -493,6 +597,11 @@ func TestZfsGeneratesMetrics(t *testing.T) {
name: "arcstats_arc_meta_max",
value: 18327165696,
},
}
}

func getKstatMetricsAll() []*metrics {
otherMetrics := []*metrics{
{
name: "zfetchstats_hits",
value: 7812959060,
Expand Down Expand Up @@ -551,53 +660,58 @@ func TestZfsGeneratesMetrics(t *testing.T) {
},
}

var acc testutil.Accumulator

//one pool, all metrics
tags := map[string]string{
"pools": "HOME",
}

z := &Zfs{KstatPath: testKstatPath}
err = z.Gather(&acc)
require.NoError(t, err)

for _, metric := range intMetrics {
fmt.Println(metric.name)
assert.True(t, acc.HasIntValue(metric.name), metric.name)
assert.True(t, acc.CheckTaggedValue(metric.name, metric.value, tags))
}

//two pools, all metrics
err = os.MkdirAll(testKstatPath+"/STORAGE", 0755)
require.NoError(t, err)

err = ioutil.WriteFile(testKstatPath+"/STORAGE/io", []byte(""), 0644)
require.NoError(t, err)

tags = map[string]string{
"pools": "HOME::STORAGE",
}

z = &Zfs{KstatPath: testKstatPath}
err = z.Gather(&acc)
require.NoError(t, err)

for _, metric := range intMetrics {
assert.True(t, acc.HasIntValue(metric.name), metric.name)
assert.True(t, acc.CheckTaggedValue(metric.name, metric.value, tags))
}

//two pools, one metric
z = &Zfs{KstatPath: testKstatPath, KstatMetrics: []string{"arcstats"}}
err = z.Gather(&acc)
require.NoError(t, err)
return append(getKstatMetricsArcOnly(), otherMetrics...)
}

for _, metric := range intMetrics {
assert.True(t, acc.HasIntValue(metric.name), metric.name)
assert.True(t, acc.CheckTaggedValue(metric.name, metric.value, tags))
func getPoolMetrics() []*metrics {
return []*metrics{
{
name: "nread",
value: 1884160,
},
{
name: "nwritten",
value: 6450688,
},
{
name: "reads",
value: 22,
},
{
name: "writes",
value: 978,
},
{
name: "wtime",
value: 272187126,
},
{
name: "wlentime",
value: 2850519036,
},
{
name: "wupdate",
value: 2263669418655,
},
{
name: "rtime",
value: 424226814,
},
{
name: "rlentime",
value: 2850519036,
},
{
name: "rupdate",
value: 2263669871823,
},
{
name: "wcnt",
value: 0,
},
{
name: "rcnt",
value: 0,
},
}

err = os.RemoveAll(os.TempDir() + "/telegraf")
require.NoError(t, err)
}