Skip to content

Commit

Permalink
Test Dashboard: Edit config file to allow edits while provisioned (#2772
Browse files Browse the repository at this point in the history
)

* update dashboard

* add config file and gitignore
  • Loading branch information
and-rewsmith authored Apr 1, 2020
1 parent 780cd2f commit 2554c01
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 9 deletions.
1 change: 1 addition & 0 deletions tools/IoTEdgeDevOps/TestDashboard/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.env
16 changes: 15 additions & 1 deletion tools/IoTEdgeDevOps/TestDashboard/config/dashboard.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,25 @@
apiVersion: 1

providers:
- name: 'Azure Monitor'
# <string> an unique provider name
- name: 'Provider'
# <int> org id. will default to orgId 1 if not specified
orgId: 1
# <string, required> name of the dashboard folder. Required
folder: ''
# <string> folder UID. will be automatically generated if not specified
folderUid: ''
# <string, required> provider type. Required
type: file
# <bool> disable dashboard deletion
disableDeletion: false
# <bool> enable dashboard editing
editable: true
# <int> how often Grafana will scan for changed dashboards
updateIntervalSeconds: 10
# <bool> allow updating provisioned dashboards from the UI
allowUiUpdates: true
options:
# <string, required> path to dashboard files on disk. Required
path: /etc/grafana/provisioning/dashboards

Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 6,
"iteration": 1585248647613,
"iteration": 1585703889150,
"links": [],
"panels": [
{
Expand Down Expand Up @@ -1677,7 +1676,7 @@
"thresholds": "",
"timeFrom": null,
"timeShift": null,
"title": "Total In-Progress Bugs | Priority Other",
"title": "Total Bugs | Priority Other",
"type": "singlestat",
"valueFontSize": "50%",
"valueMaps": [
Expand Down Expand Up @@ -1928,6 +1927,10 @@
{
"text": "",
"value": "1"
},
{
"text": "N/A",
"value": "NaN"
}
]
}
Expand All @@ -1944,7 +1947,7 @@
"valueColumn": ""
},
"azureLogAnalytics": {
"query": "let mostRecentTestBuildNumber = toscalar(connectivity_CL\n| where TestInfo_ImagesSourceBranch_s == \"$BranchName\"\n| summarize arg_max(TestInfo_TestBuildNumber_s, TimeGenerated)\n| project TestInfo_TestBuildNumber_s);\n\nconnectivity_CL\n| where TestInfo_TestBuildNumber_s == mostRecentTestBuildNumber and TestInfo_TestBuildNumber_s != \"\"\n| extend Scenario = TestInfo_NetworkDescription_s \n| mv-expand todynamic(TestResultReports_s)\n| extend TestName = TestResultReports_s.TestDescription\n| extend IsTestPassed = iff(TestResultReports_s.IsPassed == \"true\", 1, 0)\n| project TestName, Scenario, IsTestPassed\n| evaluate pivot(Scenario, avg(IsTestPassed))\n| where TestName != \"\"",
"query": "let mostRecentTestBuildNumber = toscalar(connectivity_CL\n| where TestInfo_ImagesSourceBranch_s == \"$BranchName\"\n| summarize arg_max(TestInfo_TestBuildNumber_s, TimeGenerated)\n| project TestInfo_TestBuildNumber_s);\n\nconnectivity_CL\n| where TestInfo_TestBuildNumber_s == mostRecentTestBuildNumber and TestInfo_TestBuildNumber_s != \"\"\n| extend Scenario = strcat(TestInfo_NetworkDescription_s, \" | \", TestInfo_HostPlatform_s)\n| mv-expand todynamic(TestResultReports_s)\n| extend TestName = TestResultReports_s.TestDescription\n| extend IsTestPassed = iff(TestResultReports_s.IsPassed == \"true\", 1, 0)\n| project TestName, Scenario, IsTestPassed\n| evaluate pivot(Scenario, avg(IsTestPassed))\n| where TestName != \"\"",
"resultFormat": "table",
"workspace": "fdf47b96-87f3-4b86-90b9-d83e2deae8a0"
},
Expand Down Expand Up @@ -2032,7 +2035,7 @@
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
"#73BF69"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
Expand Down Expand Up @@ -2176,7 +2179,7 @@
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
"rgb(50, 162, 38)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
Expand Down Expand Up @@ -2430,7 +2433,7 @@
"valueColumn": ""
},
"azureLogAnalytics": {
"query": "\nlet mostRecentTestBuildNumber = toscalar(sanitizedTestMetrics\n| where testType == 'LongHaul' and imagesBranchName == \"$BranchName\"\n| summarize arg_max(TimeGenerated, testBuildNumber)\n| project testBuildNumber);\nlet filtered = sanitizedTestMetrics\n| where testBuildNumber == mostRecentTestBuildNumber and testType == \"LongHaul\";\nlet filteredWithMinTime = filtered\n| summarize minTimeGenerated = min(TimeGenerated);\nlet filteredWithMaxTime = filtered\n| summarize maxTimeGenerated = max(TimeGenerated);\nlet minTimeGenerated = toscalar(filteredWithMinTime);\nlet maxTimeGenerated = toscalar(filteredWithMaxTime);\nlet environmentPrefix = toscalar(filtered\n| where device contains \"Linux\"\n| extend device = substring(device, 0, 3)\n| project device);\nlet alerts = Alert\n| where TimeGenerated > minTimeGenerated + 15m and TimeGenerated < maxTimeGenerated and AlertName contains environmentPrefix\n| extend device = extract(\".*longhaul\", 0, AlertName)\n| order by TimeGenerated;\nlet descriptions = datatable(description: string)\n[\"Reported Metrics\",\n \"Queue Length\",\n \"Upstream Message Rate\",\n \"Local Message Rate\",\n \"Reported Property Rate\",\n \"Queue Length\",\n \"Edge Agent CPU\",\n \"Edge Hub CPU\",\n \"Edge Agent Memory\",\n \"Edge Hub Memory\",\n \"Failed Module Starts\",\n \"Disk Space\"\n ];\nlet devices = filtered\n| summarize count() by device\n| extend device = tolower(device)\n| project device;\nlet deviceDescriptions = devices \n| extend dummy=1 | join kind = inner (descriptions | extend dummy=1) on dummy // dummy cross join from AzureDiagnostics\n| project device, description\n| extend violations = 0; \nlet numberOfMetrics = alerts\n| where AlertDescription contains \"number-of-metrics-too-low\" or AlertDescription contains \"number-of-metrics-too-high\"\n| summarize violations = count() by device\n| extend description = \"Reported Metrics\";\nlet upstreamMessageRate = alerts\n| where AlertDescription contains \"upstream-message-rate\" or AlertDescription contains \"no-upstream-messages\"\n| summarize violations = count() by device\n| extend description = \"Upstream Message Rate\";\nlet localMessageRate = alerts\n| where AlertDescription contains \"local-message-rate\" or AlertDescription contains \"no-local-messages\"\n| summarize violations = count() by device\n| extend description = \"Local Message Rate\";\nlet reportedPropertyRate = alerts\n| where AlertDescription contains \"reported-property-rate\" or AlertDescription contains \"no-reported-properties\"\n| summarize violations = count() by device\n| extend description = \"Reported Property Rate\";\nlet queueLength = alerts\n| where AlertDescription contains \"queue-length\"\n| summarize violations = count() by device\n| extend description = \"Queue Length\";\nlet edgeAgentCPU = alerts\n| where AlertDescription contains \"edge-agent-cpu\"\n| summarize violations = count() by device\n| extend description = \"Edge Agent CPU\";\nlet edgeHubCPU = alerts\n| where AlertDescription contains \"edge-hub-cpu\"\n| summarize violations = count() by device\n| extend description = \"Edge Hub CPU\";\nlet edgeAgentMemory = alerts\n| where AlertDescription contains \"edge-agent-memory\"\n| summarize violations = count() by device\n| extend description = \"Edge Agent Memory\";\nlet edgeHubMemory = alerts\n| where AlertDescription contains \"edge-hub-memory\"\n| summarize violations = count() by device\n| extend description = \"Edge Hub Memory\";\nlet moduleStarts = alerts\n| where AlertDescription contains \"failed-module-starts\"\n| summarize violations = count() by device\n| extend description = \"Failed Module Starts\";\nlet diskSpace = alerts\n| where AlertDescription contains \"disk-space\"\n| summarize violations = count() by device\n| extend description = \"Disk Space\";\nlet mergeViolations = (T1:(device: string, description: string, violations: long), T2:(device: string, description: string, violations: long)) {\nT1\n | join kind = leftouter (T2 | extend device = tolower(device)) on device, description\n | project-away device1, description1\n | extend violations = iff(isnull(violations1) == false, violations1, violations)\n | project-away violations1\n | extend violations = iff(device contains \"winpro\" and (description contains \"Memory\" or description contains \"Disk\"), -1, violations)\n};\nlet tmp0 = mergeViolations(deviceDescriptions, numberOfMetrics);\nlet tmp1 = mergeViolations(tmp0, upstreamMessageRate);\nlet tmp2 = mergeViolations(tmp1, localMessageRate);\nlet tmp3 = mergeViolations(tmp2, reportedPropertyRate);\nlet tmp4 = mergeViolations(tmp3, queueLength);\nlet tmp5 = mergeViolations(tmp4, edgeAgentCPU);\nlet tmp6 = mergeViolations(tmp5, edgeHubCPU);\nlet tmp7 = mergeViolations(tmp6, edgeAgentMemory);\nlet tmp8 = mergeViolations(tmp7, edgeHubMemory);\nlet tmp9 = mergeViolations(tmp8, moduleStarts);\nlet tmp10 = mergeViolations(tmp9, diskSpace);\ntmp10\n| evaluate pivot(description, avg(violations))\n| project-reorder * asc\n\n",
"query": "\nlet mostRecentTestBuildNumber = toscalar(sanitizedTestMetrics\n| where testType == 'LongHaul' and imagesBranchName == \"$BranchName\"\n| summarize arg_max(TimeGenerated, testBuildNumber)\n| project testBuildNumber);\nlet filtered = sanitizedTestMetrics\n| where testBuildNumber == mostRecentTestBuildNumber and testType == \"LongHaul\";\nlet filteredWithMinTime = filtered\n| summarize minTimeGenerated = min(TimeGenerated);\nlet filteredWithMaxTime = filtered\n| summarize maxTimeGenerated = max(TimeGenerated);\nlet minTimeGenerated = toscalar(filteredWithMinTime);\nlet maxTimeGenerated = toscalar(filteredWithMaxTime);\nlet environmentPrefix = toscalar(filtered\n| where device contains \"Linux\"\n| extend device = substring(device, 0, 3)\n| project device);\nlet alerts = Alert\n| where TimeGenerated > minTimeGenerated + 15m and TimeGenerated < maxTimeGenerated and AlertName contains environmentPrefix\n| extend device = extract(\".*longhaul\", 0, AlertName)\n| order by TimeGenerated;\nlet descriptions = datatable(description: string)\n[\"Reported Metrics\",\n \"Queue Length\",\n \"Upstream Message Rate\",\n \"Local Message Rate\",\n \"Reported Property Rate\",\n \"Queue Length\",\n \"Edge Agent CPU\",\n \"Edge Hub CPU\",\n \"Edge Agent Memory\",\n \"Edge Hub Memory\",\n \"Failed Module Starts\",\n \"Disk Space\"\n ];\n let getConsecutiveFailures = (T1:(AlertName: string, TimeGenerated: datetime)) {\n T1\n | order by AlertName, TimeGenerated asc\n | extend nextAlertName = next(AlertName, 1)\n | extend nextTime = next(TimeGenerated, 1)\n | extend next2Time = next(TimeGenerated, 2)\n | project-reorder AlertName, TimeGenerated\n | extend isError = iff(nextTime - TimeGenerated < 20m and next2Time - nextTime < 20m and AlertName == nextAlertName, 1, 0)\n | where isError == 1\n};\nlet devices = filtered\n| summarize count() by device\n| extend device = tolower(device)\n| project device;\nlet deviceDescriptions = devices \n| extend dummy=1 | join kind = inner (descriptions | extend dummy=1) on dummy // dummy cross join from AzureDiagnostics\n| project device, description\n| extend violations = 0; \nlet numberOfMetrics = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"number-of-metrics-too-low\" or AlertDescription contains \"number-of-metrics-too-high\"\n| summarize violations = count() by device\n| extend description = \"Reported Metrics\";\nlet upstreamMessageRate = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"upstream-message-rate\" or AlertDescription contains \"no-upstream-messages\"\n| summarize violations = count() by device\n| extend description = \"Upstream Message Rate\";\nlet localMessageRate = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"local-message-rate\" or AlertDescription contains \"no-local-messages\"\n| summarize violations = count() by device\n| extend description = \"Local Message Rate\";\nlet reportedPropertyRate = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"reported-property-rate\" or AlertDescription contains \"no-reported-properties\"\n| summarize violations = count() by device\n| extend description = \"Reported Property Rate\";\nlet queueLength = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"queue-length\"\n| summarize violations = count() by device\n| extend description = \"Queue Length\";\nlet edgeAgentCPU = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"edge-agent-cpu\"\n| summarize violations = count() by device\n| extend description = \"Edge Agent CPU\";\nlet edgeHubCPU = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"edge-hub-cpu\"\n| summarize violations = count() by device\n| extend description = \"Edge Hub CPU\";\nlet edgeAgentMemory = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"edge-agent-memory\"\n| summarize violations = count() by device\n| extend description = \"Edge Agent Memory\";\nlet edgeHubMemory = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"edge-hub-memory\"\n| summarize violations = count() by device\n| extend description = \"Edge Hub Memory\";\nlet moduleStarts = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"failed-module-starts\"\n| summarize violations = count() by device\n| extend description = \"Failed Module Starts\";\nlet diskSpace = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"disk-space\"\n| summarize violations = count() by device\n| extend description = \"Disk Space\";\nlet mergeViolations = (T1:(device: string, description: string, violations: long), T2:(device: string, description: string, violations: long)) {\nT1\n | join kind = leftouter (T2 | extend device = tolower(device)) on device, description\n | project-away device1, description1\n | extend violations = iff(isnull(violations1) == false, violations1, violations)\n | project-away violations1\n | extend violations = iff(device contains \"winpro\" and (description contains \"Memory\" or description contains \"Disk\" or description contains \"cpu\"), -1, violations)\n};\nlet tmp0 = mergeViolations(deviceDescriptions, numberOfMetrics);\nlet tmp1 = mergeViolations(tmp0, upstreamMessageRate);\nlet tmp2 = mergeViolations(tmp1, localMessageRate);\nlet tmp3 = mergeViolations(tmp2, reportedPropertyRate);\nlet tmp4 = mergeViolations(tmp3, queueLength);\nlet tmp5 = mergeViolations(tmp4, edgeAgentCPU);\nlet tmp6 = mergeViolations(tmp5, edgeHubCPU);\nlet tmp7 = mergeViolations(tmp6, edgeAgentMemory);\nlet tmp8 = mergeViolations(tmp7, edgeHubMemory);\nlet tmp9 = mergeViolations(tmp8, moduleStarts);\nlet tmp10 = mergeViolations(tmp9, diskSpace);\ntmp10\n| evaluate pivot(description, avg(violations))\n| project-reorder * asc\n",
"resultFormat": "table",
"workspace": "fdf47b96-87f3-4b86-90b9-d83e2deae8a0"
},
Expand Down Expand Up @@ -2954,5 +2957,5 @@
"timezone": "",
"title": "Home",
"uid": "OLjJ46wWz",
"version": 130
"version": 1
}

0 comments on commit 2554c01

Please sign in to comment.