diff --git a/plugins/inputs/ipmi_sensor/README.md b/plugins/inputs/ipmi_sensor/README.md index e47a72789e7c4..41f9fb6848b89 100644 --- a/plugins/inputs/ipmi_sensor/README.md +++ b/plugins/inputs/ipmi_sensor/README.md @@ -44,42 +44,48 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. ```toml @sample.conf # Read metrics from the bare metal servers via IPMI [[inputs.ipmi_sensor]] - ## optionally specify the path to the ipmitool executable + ## Specify the path to the ipmitool executable # path = "/usr/bin/ipmitool" - ## + + ## Use sudo ## Setting 'use_sudo' to true will make use of sudo to run ipmitool. ## Sudo must be configured to allow the telegraf user to run ipmitool ## without a password. # use_sudo = false - ## - ## optionally force session privilege level. Can be CALLBACK, USER, OPERATOR, ADMINISTRATOR - # privilege = "ADMINISTRATOR" - ## - ## optionally specify one or more servers via a url matching + + ## Servers + ## Specify one or more servers via a url. If no servers are specified, local + ## machine sensor stats will be queried. Uses the format: ## [username[:password]@][protocol[(address)]] - ## e.g. - ## root:passwd@lan(127.0.0.1) - ## - ## if no servers are specified, local machine sensor stats will be queried - ## + ## e.g. root:passwd@lan(127.0.0.1) # servers = ["USERID:PASSW0RD@lan(192.168.1.1)"] - ## Recommended: use metric 'interval' that is a multiple of 'timeout' to avoid - ## gaps or overlap in pulled data - interval = "30s" + ## Session privilege level + ## Choose from: CALLBACK, USER, OPERATOR, ADMINISTRATOR + # privilege = "ADMINISTRATOR" + + ## Timeout + ## Timeout for the ipmitool command to complete. + # timeout = "20s" - ## Timeout for the ipmitool command to complete. Default is 20 seconds. - timeout = "20s" + ## Metric schema version + ## See the plugin readme for more information on schema versioning. + # metric_version = 1 - ## Schema Version: (Optional, defaults to version 1) - metric_version = 2 + ## Sensors to collect + ## Choose from: + ## * sdr: default, collects sensor data records + ## * chassis_power_status: collects the power status of the chassis + ## * dcmi_power_reading: collects the power readings from the Data Center Management Interface + # sensors = ["sdr"] + ## Hex key ## Optionally provide the hex key for the IMPI connection. # hex_key = "" + ## Cache ## If ipmitool should use a cache - ## for me ipmitool runs about 2 to 10 times faster with cache enabled on HP G10 servers (when using ubuntu20.04) - ## the cache file may not work well for you if some sensors come up late + ## Using a cache can speed up collection times depending on your device. # use_cache = false ## Path to the ipmitools cache file (defaults to OS temp dir) @@ -87,6 +93,17 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. # cache_path = "" ``` +## Sensors + +By default the plugin collects data via the `sdr` command and returns those +values. However, there are additonal sensor options that be call on: + +- `chassis_power_status` - returns 0 or 1 depending on the output of + `chassis power status` +- `dcmi_power_reading` - Returns the watt values from `dcmi power reading` + +These sensor options are not affected by the metric version. + ## Metrics Version 1 schema: diff --git a/plugins/inputs/ipmi_sensor/ipmi_sensor.go b/plugins/inputs/ipmi_sensor/ipmi_sensor.go index 025740050a1f5..ab02bab45da96 100644 --- a/plugins/inputs/ipmi_sensor/ipmi_sensor.go +++ b/plugins/inputs/ipmi_sensor/ipmi_sensor.go @@ -19,6 +19,7 @@ import ( "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/config" "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/internal/choice" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -31,21 +32,22 @@ var ( reV2ParseLine = regexp.MustCompile(`^(?P[^|]*)\|[^|]+\|(?P[^|]*)\|(?P[^|]*)\|(?:(?P[^|]+))?`) reV2ParseDescription = regexp.MustCompile(`^(?P-?[0-9.]+)\s(?P.*)|(?P.+)|^$`) reV2ParseUnit = regexp.MustCompile(`^(?P[^,]+)(?:,\s*(?P.*))?`) + dcmiPowerReading = regexp.MustCompile(`^(?P[^|]*)\:(?P.* Watts)?`) ) // Ipmi stores the configuration values for the ipmi_sensor input plugin type Ipmi struct { - Path string - Privilege string - HexKey string `toml:"hex_key"` - Servers []string - Timeout config.Duration - MetricVersion int - UseSudo bool - UseCache bool - CachePath string - - Log telegraf.Logger `toml:"-"` + Path string `toml:"path"` + Privilege string `toml:"privilege"` + HexKey string `toml:"hex_key"` + Servers []string `toml:"servers"` + Sensors []string `toml:"sensors"` + Timeout config.Duration `toml:"timeout"` + MetricVersion int `toml:"metric_version"` + UseSudo bool `toml:"use_sudo"` + UseCache bool `toml:"use_cache"` + CachePath string `toml:"cache_path"` + Log telegraf.Logger `toml:"-"` } const cmd = "ipmitool" @@ -66,6 +68,12 @@ func (m *Ipmi) Init() error { if m.CachePath == "" { m.CachePath = os.TempDir() } + if len(m.Sensors) == 0 { + m.Sensors = []string{"sdr"} + } + if err := choice.CheckSlice(m.Sensors, []string{"sdr", "chassis_power_status", "dcmi_power_reading"}); err != nil { + return err + } // Check parameters if m.Path == "" { @@ -87,24 +95,37 @@ func (m *Ipmi) Gather(acc telegraf.Accumulator) error { wg.Add(1) go func(a telegraf.Accumulator, s string) { defer wg.Done() - err := m.parse(a, s) - if err != nil { - a.AddError(err) + for _, sensor := range m.Sensors { + a.AddError(m.parse(a, s, sensor)) } }(acc, server) } wg.Wait() } else { - err := m.parse(acc, "") - if err != nil { - return err + for _, sensor := range m.Sensors { + err := m.parse(acc, "", sensor) + if err != nil { + return err + } } } return nil } -func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error { +func (m *Ipmi) parse(acc telegraf.Accumulator, server string, sensor string) error { + var command []string + switch sensor { + case "sdr": + command = append(command, "sdr") + case "chassis_power_status": + command = append(command, "chassis", "power", "status") + case "dcmi_power_reading": + command = append(command, "dcmi", "power", "reading") + default: + return fmt.Errorf("unknown sensor type %q", sensor) + } + opts := make([]string, 0) hostname := "" if server != "" { @@ -112,7 +133,9 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error { hostname = conn.Hostname opts = conn.options() } - opts = append(opts, "sdr") + + opts = append(opts, command...) + if m.UseCache { cacheFile := filepath.Join(m.CachePath, server+"_ipmi_cache") _, err := os.Stat(cacheFile) @@ -134,7 +157,7 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error { } opts = append(opts, "-S", cacheFile) } - if m.MetricVersion == 2 { + if m.MetricVersion == 2 && sensor == "sdr" { opts = append(opts, "elist") } name := m.Path @@ -149,10 +172,78 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error { if err != nil { return fmt.Errorf("failed to run command %q: %w - %s", strings.Join(sanitizeIPMICmd(cmd.Args), " "), err, string(out)) } - if m.MetricVersion == 2 { - return m.parseV2(acc, hostname, out, timestamp) + + switch sensor { + case "sdr": + if m.MetricVersion == 2 { + return m.parseV2(acc, hostname, out, timestamp) + } else { + return m.parseV1(acc, hostname, out, timestamp) + } + case "chassis_power_status": + return m.parseChassisPowerStatus(acc, hostname, out, timestamp) + case "dcmi_power_reading": + return m.parseDCMIPowerReading(acc, hostname, out, timestamp) + } + + return fmt.Errorf("unknown sensor type %q", sensor) +} + +func (m *Ipmi) parseChassisPowerStatus(acc telegraf.Accumulator, hostname string, cmdOut []byte, measuredAt time.Time) error { + // each line will look something like + // Chassis Power is on + // Chassis Power is off + scanner := bufio.NewScanner(bytes.NewReader(cmdOut)) + for scanner.Scan() { + line := scanner.Text() + if strings.Contains(line, "Chassis Power is on") { + acc.AddFields("ipmi_sensor", map[string]interface{}{"value": 1}, map[string]string{"name": "chassis_power_status", "server": hostname}, measuredAt) + } else if strings.Contains(line, "Chassis Power is off") { + acc.AddFields("ipmi_sensor", map[string]interface{}{"value": 0}, map[string]string{"name": "chassis_power_status", "server": hostname}, measuredAt) + } + } + + return scanner.Err() +} + +func (m *Ipmi) parseDCMIPowerReading(acc telegraf.Accumulator, hostname string, cmdOut []byte, measuredAt time.Time) error { + // each line will look something like + // Current Power Reading : 0.000 + scanner := bufio.NewScanner(bytes.NewReader(cmdOut)) + for scanner.Scan() { + ipmiFields := m.extractFieldsFromRegex(dcmiPowerReading, scanner.Text()) + if len(ipmiFields) != 2 { + continue + } + + tags := map[string]string{ + "name": transform(ipmiFields["name"]), + } + + // tag the server is we have one + if hostname != "" { + tags["server"] = hostname + } + + fields := make(map[string]interface{}) + valunit := strings.Split(ipmiFields["value"], " ") + if len(valunit) != 2 { + continue + } + + var err error + fields["value"], err = aToFloat(valunit[0]) + if err != nil { + continue + } + if len(valunit) > 1 { + tags["unit"] = transform(valunit[1]) + } + + acc.AddFields("ipmi_sensor", fields, tags, measuredAt) } - return m.parseV1(acc, hostname, out, timestamp) + + return scanner.Err() } func (m *Ipmi) parseV1(acc telegraf.Accumulator, hostname string, cmdOut []byte, measuredAt time.Time) error { diff --git a/plugins/inputs/ipmi_sensor/ipmi_sensor_test.go b/plugins/inputs/ipmi_sensor/ipmi_sensor_test.go index 77bed9ddcd913..f2fd4734a9044 100644 --- a/plugins/inputs/ipmi_sensor/ipmi_sensor_test.go +++ b/plugins/inputs/ipmi_sensor/ipmi_sensor_test.go @@ -771,6 +771,128 @@ func Test_parseV2(t *testing.T) { } } +func Test_parsePowerStatus(t *testing.T) { + type args struct { + hostname string + cmdOut []byte + measuredAt time.Time + } + tests := []struct { + name string + args args + expected []telegraf.Metric + }{ + { + name: "Test correct parse power status off", + args: args{ + hostname: "host", + cmdOut: []byte("Chassis Power is off"), + measuredAt: time.Now(), + }, + expected: []telegraf.Metric{ + testutil.MustMetric("ipmi_sensor", + map[string]string{ + "name": "chassis_power_status", + "server": "host", + }, + map[string]interface{}{"value": 0}, + time.Unix(0, 0), + ), + }, + }, + { + name: "Test correct parse power status on", + args: args{ + hostname: "host", + cmdOut: []byte("Chassis Power is on"), + measuredAt: time.Now(), + }, + expected: []telegraf.Metric{ + testutil.MustMetric("ipmi_sensor", + map[string]string{ + "name": "chassis_power_status", + "server": "host", + }, + map[string]interface{}{"value": 1}, + time.Unix(0, 0), + ), + }, + }, + } + + ipmi := &Ipmi{ + Log: testutil.Logger{}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var acc testutil.Accumulator + err := ipmi.parseChassisPowerStatus(&acc, tt.args.hostname, tt.args.cmdOut, tt.args.measuredAt) + require.NoError(t, err) + testutil.RequireMetricsEqual(t, tt.expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime()) + }) + } +} + +func Test_parsePowerReading(t *testing.T) { + output := `Instantaneous power reading: 167 Watts +Minimum during sampling period: 124 Watts +Maximum during sampling period: 422 Watts +Average power reading over sample period: 156 Watts +IPMI timestamp: Mon Aug 1 21:22:51 2016 +Sampling period: 00699043 Seconds. +Power reading state is: activated +` + + expected := []telegraf.Metric{ + testutil.MustMetric("ipmi_sensor", + map[string]string{ + "name": "instantaneous_power_reading", + "server": "host", + "unit": "watts", + }, + map[string]interface{}{"value": float64(167)}, + time.Unix(0, 0), + ), + testutil.MustMetric("ipmi_sensor", + map[string]string{ + "name": "minimum_during_sampling_period", + "server": "host", + "unit": "watts", + }, + map[string]interface{}{"value": float64(124)}, + time.Unix(0, 0), + ), + testutil.MustMetric("ipmi_sensor", + map[string]string{ + "name": "maximum_during_sampling_period", + "server": "host", + "unit": "watts", + }, + map[string]interface{}{"value": float64(422)}, + time.Unix(0, 0), + ), + testutil.MustMetric("ipmi_sensor", + map[string]string{ + "name": "average_power_reading_over_sample_period", + "server": "host", + "unit": "watts", + }, + map[string]interface{}{"value": float64(156)}, + time.Unix(0, 0), + ), + } + + ipmi := &Ipmi{ + Log: testutil.Logger{}, + } + + var acc testutil.Accumulator + err := ipmi.parseDCMIPowerReading(&acc, "host", []byte(output), time.Now()) + require.NoError(t, err) + testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime()) +} + func TestSanitizeIPMICmd(t *testing.T) { tests := []struct { name string diff --git a/plugins/inputs/ipmi_sensor/sample.conf b/plugins/inputs/ipmi_sensor/sample.conf index 3cabeb204d9f9..8964849a0d823 100644 --- a/plugins/inputs/ipmi_sensor/sample.conf +++ b/plugins/inputs/ipmi_sensor/sample.conf @@ -1,41 +1,47 @@ # Read metrics from the bare metal servers via IPMI [[inputs.ipmi_sensor]] - ## optionally specify the path to the ipmitool executable + ## Specify the path to the ipmitool executable # path = "/usr/bin/ipmitool" - ## + + ## Use sudo ## Setting 'use_sudo' to true will make use of sudo to run ipmitool. ## Sudo must be configured to allow the telegraf user to run ipmitool ## without a password. # use_sudo = false - ## - ## optionally force session privilege level. Can be CALLBACK, USER, OPERATOR, ADMINISTRATOR - # privilege = "ADMINISTRATOR" - ## - ## optionally specify one or more servers via a url matching + + ## Servers + ## Specify one or more servers via a url. If no servers are specified, local + ## machine sensor stats will be queried. Uses the format: ## [username[:password]@][protocol[(address)]] - ## e.g. - ## root:passwd@lan(127.0.0.1) - ## - ## if no servers are specified, local machine sensor stats will be queried - ## + ## e.g. root:passwd@lan(127.0.0.1) # servers = ["USERID:PASSW0RD@lan(192.168.1.1)"] - ## Recommended: use metric 'interval' that is a multiple of 'timeout' to avoid - ## gaps or overlap in pulled data - interval = "30s" + ## Session privilege level + ## Choose from: CALLBACK, USER, OPERATOR, ADMINISTRATOR + # privilege = "ADMINISTRATOR" + + ## Timeout + ## Timeout for the ipmitool command to complete. + # timeout = "20s" - ## Timeout for the ipmitool command to complete. Default is 20 seconds. - timeout = "20s" + ## Metric schema version + ## See the plugin readme for more information on schema versioning. + # metric_version = 1 - ## Schema Version: (Optional, defaults to version 1) - metric_version = 2 + ## Sensors to collect + ## Choose from: + ## * sdr: default, collects sensor data records + ## * chassis_power_status: collects the power status of the chassis + ## * dcmi_power_reading: collects the power readings from the Data Center Management Interface + # sensors = ["sdr"] + ## Hex key ## Optionally provide the hex key for the IMPI connection. # hex_key = "" + ## Cache ## If ipmitool should use a cache - ## for me ipmitool runs about 2 to 10 times faster with cache enabled on HP G10 servers (when using ubuntu20.04) - ## the cache file may not work well for you if some sensors come up late + ## Using a cache can speed up collection times depending on your device. # use_cache = false ## Path to the ipmitools cache file (defaults to OS temp dir)