Skip to content

Commit

Permalink
Merge pull request pingcap#21 from qiuyesuifeng/tiboys/hackathon1022
Browse files Browse the repository at this point in the history
*: add system info table and tiny fix
  • Loading branch information
qiuyesuifeng authored Oct 23, 2019
2 parents 1cd0e16 + ec0397e commit adaea5c
Show file tree
Hide file tree
Showing 3 changed files with 188 additions and 30 deletions.
23 changes: 15 additions & 8 deletions executor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -1636,17 +1636,14 @@ func (e *TiDBInspectionExec) Next(ctx context.Context, req *chunk.Chunk) error {
req.AppendInt64(0, idx)
req.AppendString(1, fmt.Sprintf("create inspection database [%s]", e.i.GetDBName()))
if err := e.i.CreateInspectionDB(); err != nil {
req.AppendString(2, err.Error())
return errors.Trace(err)
} else {
req.AppendString(2, "OK")
}

// create inspection tables
if err := e.i.CreateInspectionTables(); err != nil {
idx++
req.AppendInt64(0, idx)
req.AppendString(1, "create inspection tables")
req.AppendString(2, err.Error())
return errors.Trace(err)
} else {
for _, table := range e.i.GetTableNames() {
idx++
Expand All @@ -1668,12 +1665,22 @@ func (e *TiDBInspectionExec) Next(ctx context.Context, req *chunk.Chunk) error {
}
*/

// fill TIDB_CLUSTER_INFO table
// generate TIDB_CLUSTER_INFO table
idx++
req.AppendInt64(0, idx)
req.AppendString(1, "fill [TIDB_CLUSTER_INFO] table")
req.AppendString(1, "generate [TIDB_CLUSTER_INFO] table")
if err := e.i.GetClusterInfo(); err != nil {
req.AppendString(2, err.Error())
return errors.Trace(err)
} else {
req.AppendString(2, "OK")
}

// generate SYSTEM_INFO table
idx++
req.AppendInt64(0, idx)
req.AppendString(1, "generate [SYSTEM_INFO] table")
if err := e.i.GetSystemInfo(); err != nil {
return errors.Trace(err)
} else {
req.AppendString(2, "OK")
}
Expand Down
17 changes: 9 additions & 8 deletions infoschema/inspection/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,18 @@ const tableTiDBClusterInfo = `CREATE TABLE %s.TIDB_CLUSTER_INFO (

const tableSystemInfo = `CREATE TABLE %s.SYSTEM_INFO (
ID bigint(21) unsigned DEFAULT NULL,
TYPE varchar(64) DEFAULT NULL,
NAME varchar(64) DEFAULT NULL,
IP varchar(64) DEFAULT NULL,
Name varchar(64) DEFAULT NULL,
Type varchar(64) DEFAULT NULL,
STATUS_ADDRESS varchar(64) DEFAULT NULL,
ADDRESS varchar(64) DEFAULT NULL,
CPU varchar(64) DEFAULT NULL,
CPU_USAGE double DEFAULT NULL,
CPU_USAGE varchar(64) DEFAULT NULL,
MEMORY varchar(64) DEFAULT NULL,
MEMORY_USAGE double DEFAULT NULL,
VERSION varchar(64) DEFAULT NULL,
OS_VERSION varchar(128) DEFAULT NULL,
KERNAL_VERSION varchar(128) DEFAULT NULL
MEMORY_USAGE varchar(64) DEFAULT NULL,
LOAD1 varchar(64) DEFAULT NULL,
LOAD5 varchar(64) DEFAULT NULL,
LOAD15 varchar(64) DEFAULT NULL,
KERNAL varchar(128) DEFAULT NULL
)`

const tableTiDBKeyMetrcisInfo = `CREATE TABLE %s.TIDB_KEY_METRICS_INFO (
Expand Down
178 changes: 164 additions & 14 deletions infoschema/inspection/inspection.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,38 @@ import (
"github.com/pingcap/parser"
"github.com/pingcap/parser/ast"
"github.com/pingcap/parser/model"
"github.com/pingcap/tidb/config"
"github.com/pingcap/tidb/domain"
"github.com/pingcap/tidb/domain/infosync"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/store/helper"
"github.com/pingcap/tidb/store/tikv"
"github.com/pingcap/tidb/util"
"github.com/pingcap/tidb/util/sqlexec"
"github.com/prometheus/client_golang/api"
"github.com/prometheus/client_golang/api/prometheus/v1"
pmodel "github.com/prometheus/common/model"
)

const promReadTimeout = 10 * time.Second

func NewInspectionHelper(ctx sessionctx.Context) *InspectionHelper {
return &InspectionHelper{
ctx: ctx,
p: parser.New(),
dbName: fmt.Sprintf("%s_%s", "TIDB_INSPECTION", time.Now().Format("20060102150405")),
tableNames: []string{},
items: []ClusterItem{},
ctx: ctx,
p: parser.New(),
dbName: fmt.Sprintf("%s_%s", "TIDB_INSPECTION", time.Now().Format("20060102150405")),
tableNames: []string{},
items: []ClusterItem{},
nodeExporters: make(map[string]string),
}
}

type ClusterItem struct {
ID int64
IP string
Type string
Name string
IP string
Address string
Type string
}

type InspectionHelper struct {
Expand All @@ -44,8 +51,10 @@ type InspectionHelper struct {
dbName string
tableNames []string

items []ClusterItem
isInit bool
items []ClusterItem
isInit bool
nodeExporters map[string]string
promClient api.Client
}

func getIPfromAdress(address string) string {
Expand Down Expand Up @@ -143,7 +152,7 @@ func (i *InspectionHelper) GetClusterInfo() error {
return errors.Trace(err)
}

i.items = append(i.items, ClusterItem{int64(idx), getIPfromAdress(tidbStatusAddr), name, tidbStatusAddr, "tidb"})
i.items = append(i.items, ClusterItem{int64(idx), tp, name, item.IP, tidbAddr})
idx++
}

Expand Down Expand Up @@ -201,7 +210,7 @@ func (i *InspectionHelper) GetClusterInfo() error {
return errors.Trace(err)
}

i.items = append(i.items, ClusterItem{int64(idx), getIPfromAdress(host), name, host, tp})
i.items = append(i.items, ClusterItem{int64(idx), tp, name, getIPfromAdress(host), host})
idx++
}

Expand All @@ -223,21 +232,162 @@ func (i *InspectionHelper) GetClusterInfo() error {
return errors.Trace(err)
}

i.items = append(i.items, ClusterItem{int64(idx), getIPfromAdress(storeStat.Store.StatusAddress), name, storeStat.Store.StatusAddress, tp})
i.items = append(i.items, ClusterItem{int64(idx), tp, name, getIPfromAdress(storeStat.Store.Address), storeStat.Store.Address})
idx++
}

i.isInit = true
return nil
}

func (i *InspectionHelper) initProm() error {
if !i.isInit {
return errors.New("InspectionHelper is not init.")
}

if i.promClient != nil {
return nil
}

promAddr := config.GetGlobalConfig().PrometheusAddr
if promAddr == "" {
return errors.New("Invalid Prometheus Address")
}

var err error
i.promClient, err = api.NewClient(api.Config{
Address: fmt.Sprintf("http://%s", promAddr),
})
if err != nil {
return errors.Trace(err)
}

// get node exporter info.
api := v1.NewAPI(i.promClient)
ctx, cancel := context.WithTimeout(context.Background(), promReadTimeout)
defer cancel()

targets, err := api.Targets(ctx)
if err != nil {
return errors.Trace(err)
}

for _, target := range targets.Active {
if target.Labels["group"] == "node_exporter" {
neAddr := string(target.Labels["instance"])
if neAddr != "" {
i.nodeExporters[getIPfromAdress(neAddr)] = neAddr
}
}
}

return nil
}

func (i *InspectionHelper) getSystemInfo(item ClusterItem) error {
api := v1.NewAPI(i.promClient)
ctx, cancel := context.WithTimeout(context.Background(), promReadTimeout)
defer cancel()

neAddr, ok := i.nodeExporters[item.IP]
if !ok {
return errors.New("Can not find node exporter address")
}

// get cpu count
cpuCountQuery := fmt.Sprintf(`count(node_cpu_seconds_total{instance="%s", mode="user"})`, neAddr)
result, err := api.Query(ctx, cpuCountQuery, time.Now())
if err != nil {
return errors.Trace(err)
}
cpuCount := result.(pmodel.Vector)[0].Value

// get cpu usage.
cpuUsageQuery := fmt.Sprintf(`1 - (sum(rate(node_cpu_seconds_total{instance="%s", mode="idle"}[1m])) / count(node_cpu_seconds_total{instance="%s", mode="idle"}) or
sum(irate(node_cpu_seconds_total{instance="%s", mode="idle"}[30s])) / count(node_cpu_seconds_total{instance="%s", mode="idle"}))`,
neAddr, neAddr, neAddr, neAddr)
result, err = api.Query(ctx, cpuUsageQuery, time.Now())
if err != nil {
return errors.Trace(err)
}
cpuUsage := fmt.Sprintf("%.2f%%", 100*result.(pmodel.Vector)[0].Value)

// get total memory.
memoryQuery := fmt.Sprintf(`node_memory_MemTotal_bytes{instance="%s"}`, neAddr)
result, err = api.Query(ctx, memoryQuery, time.Now())
if err != nil {
return errors.Trace(err)
}
memory := fmt.Sprintf("%.2fGiB", result.(pmodel.Vector)[0].Value/1024/1024/1024)

// get memory usage.
memoryUsageQuery := fmt.Sprintf(`1 - (node_memory_MemAvailable_bytes{instance="%s"} or
(node_memory_MemFree_bytes{instance="%s"} + node_memory_Buffers_bytes{instance="%s"} + node_memory_Cached_bytes{instance="%s"})) / node_memory_MemTotal_bytes{instance="%s"}`,
neAddr, neAddr, neAddr, neAddr, neAddr)
result, err = api.Query(ctx, memoryUsageQuery, time.Now())
if err != nil {
return errors.Trace(err)
}
memoryUsage := fmt.Sprintf("%.2f%%", 100*result.(pmodel.Vector)[0].Value)

// get load1/load5/load15
load1Query := fmt.Sprintf(`node_load1{instance="%s"}`, neAddr)
result, err = api.Query(ctx, load1Query, time.Now())
if err != nil {
return errors.Trace(err)
}
load1 := fmt.Sprintf("%.2f", result.(pmodel.Vector)[0].Value)

load5Query := fmt.Sprintf(`node_load5{instance="%s"}`, neAddr)
result, err = api.Query(ctx, load5Query, time.Now())
if err != nil {
return errors.Trace(err)
}
load5 := fmt.Sprintf("%.2f", result.(pmodel.Vector)[0].Value)

load15Query := fmt.Sprintf(`node_load15{instance="%s"}`, neAddr)
result, err = api.Query(ctx, load15Query, time.Now())
if err != nil {
return errors.Trace(err)
}
load15 := fmt.Sprintf("%.2f", result.(pmodel.Vector)[0].Value)

// get kernel version.
kernelQuery := fmt.Sprintf(`node_uname_info{instance="%s"}`, neAddr)
result, err = api.Query(ctx, kernelQuery, time.Now())
if err != nil {
return errors.Trace(err)
}
metric := result.(pmodel.Vector)[0].Metric
os := metric["sysname"]
machine := metric["machine"]
kernelVersion := metric["release"]
kernel := fmt.Sprintf("%s-%s-%s", os, machine, kernelVersion)

sql := fmt.Sprintf(`insert into %s.SYSTEM_INFO values (%d, "%s", "%s", "%s", "%s",
"%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s");`,
i.dbName, item.ID, item.Type, item.Name, item.IP, item.Address,
cpuCount, cpuUsage, memory, memoryUsage, load1, load5, load15, kernel)

_, _, err = i.ctx.(sqlexec.RestrictedSQLExecutor).ExecRestrictedSQL(sql)
if err != nil {
return errors.Trace(err)
}

return nil
}

func (i *InspectionHelper) GetSystemInfo() error {
if !i.isInit {
return errors.New("InspectionHelper is not init.")
err := i.initProm()
if err != nil {
return errors.Trace(err)
}

for _, item := range i.items {
err = i.getSystemInfo(item)
if err != nil {
return errors.Trace(err)
}
}

return nil
Expand Down

0 comments on commit adaea5c

Please sign in to comment.