diff --git a/tools/IoTEdgeDevOps/TestDashboard/.gitignore b/tools/IoTEdgeDevOps/TestDashboard/.gitignore new file mode 100644 index 00000000000..4c49bd78f1d --- /dev/null +++ b/tools/IoTEdgeDevOps/TestDashboard/.gitignore @@ -0,0 +1 @@ +.env diff --git a/tools/IoTEdgeDevOps/TestDashboard/config/dashboard.yaml b/tools/IoTEdgeDevOps/TestDashboard/config/dashboard.yaml index 16e770b8188..418ea7c8268 100644 --- a/tools/IoTEdgeDevOps/TestDashboard/config/dashboard.yaml +++ b/tools/IoTEdgeDevOps/TestDashboard/config/dashboard.yaml @@ -1,11 +1,25 @@ apiVersion: 1 providers: -- name: 'Azure Monitor' + # an unique provider name +- name: 'Provider' + # org id. will default to orgId 1 if not specified orgId: 1 + # name of the dashboard folder. Required folder: '' + # folder UID. will be automatically generated if not specified + folderUid: '' + # provider type. Required type: file + # disable dashboard deletion disableDeletion: false + # enable dashboard editing editable: true + # how often Grafana will scan for changed dashboards + updateIntervalSeconds: 10 + # allow updating provisioned dashboards from the UI + allowUiUpdates: true options: + # path to dashboard files on disk. Required path: /etc/grafana/provisioning/dashboards + diff --git a/tools/IoTEdgeDevOps/TestDashboard/dashboards/Home-1585250973210.json b/tools/IoTEdgeDevOps/TestDashboard/dashboards/Home-1585707775101.json similarity index 95% rename from tools/IoTEdgeDevOps/TestDashboard/dashboards/Home-1585250973210.json rename to tools/IoTEdgeDevOps/TestDashboard/dashboards/Home-1585707775101.json index 333d29cdcdd..eaf467bf296 100644 --- a/tools/IoTEdgeDevOps/TestDashboard/dashboards/Home-1585250973210.json +++ b/tools/IoTEdgeDevOps/TestDashboard/dashboards/Home-1585707775101.json @@ -15,8 +15,7 @@ "editable": true, "gnetId": null, "graphTooltip": 0, - "id": 6, - "iteration": 1585248647613, + "iteration": 1585703889150, "links": [], "panels": [ { @@ -1677,7 +1676,7 @@ "thresholds": "", "timeFrom": null, "timeShift": null, - "title": "Total In-Progress Bugs | Priority Other", + "title": "Total Bugs | Priority Other", "type": "singlestat", "valueFontSize": "50%", "valueMaps": [ @@ -1928,6 +1927,10 @@ { "text": "✓", "value": "1" + }, + { + "text": "N/A", + "value": "NaN" } ] } @@ -1944,7 +1947,7 @@ "valueColumn": "" }, "azureLogAnalytics": { - "query": "let mostRecentTestBuildNumber = toscalar(connectivity_CL\n| where TestInfo_ImagesSourceBranch_s == \"$BranchName\"\n| summarize arg_max(TestInfo_TestBuildNumber_s, TimeGenerated)\n| project TestInfo_TestBuildNumber_s);\n\nconnectivity_CL\n| where TestInfo_TestBuildNumber_s == mostRecentTestBuildNumber and TestInfo_TestBuildNumber_s != \"\"\n| extend Scenario = TestInfo_NetworkDescription_s \n| mv-expand todynamic(TestResultReports_s)\n| extend TestName = TestResultReports_s.TestDescription\n| extend IsTestPassed = iff(TestResultReports_s.IsPassed == \"true\", 1, 0)\n| project TestName, Scenario, IsTestPassed\n| evaluate pivot(Scenario, avg(IsTestPassed))\n| where TestName != \"\"", + "query": "let mostRecentTestBuildNumber = toscalar(connectivity_CL\n| where TestInfo_ImagesSourceBranch_s == \"$BranchName\"\n| summarize arg_max(TestInfo_TestBuildNumber_s, TimeGenerated)\n| project TestInfo_TestBuildNumber_s);\n\nconnectivity_CL\n| where TestInfo_TestBuildNumber_s == mostRecentTestBuildNumber and TestInfo_TestBuildNumber_s != \"\"\n| extend Scenario = strcat(TestInfo_NetworkDescription_s, \" | \", TestInfo_HostPlatform_s)\n| mv-expand todynamic(TestResultReports_s)\n| extend TestName = TestResultReports_s.TestDescription\n| extend IsTestPassed = iff(TestResultReports_s.IsPassed == \"true\", 1, 0)\n| project TestName, Scenario, IsTestPassed\n| evaluate pivot(Scenario, avg(IsTestPassed))\n| where TestName != \"\"", "resultFormat": "table", "workspace": "fdf47b96-87f3-4b86-90b9-d83e2deae8a0" }, @@ -2032,7 +2035,7 @@ "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" + "#73BF69" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, @@ -2176,7 +2179,7 @@ "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" + "rgb(50, 162, 38)" ], "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, @@ -2430,7 +2433,7 @@ "valueColumn": "" }, "azureLogAnalytics": { - "query": "\nlet mostRecentTestBuildNumber = toscalar(sanitizedTestMetrics\n| where testType == 'LongHaul' and imagesBranchName == \"$BranchName\"\n| summarize arg_max(TimeGenerated, testBuildNumber)\n| project testBuildNumber);\nlet filtered = sanitizedTestMetrics\n| where testBuildNumber == mostRecentTestBuildNumber and testType == \"LongHaul\";\nlet filteredWithMinTime = filtered\n| summarize minTimeGenerated = min(TimeGenerated);\nlet filteredWithMaxTime = filtered\n| summarize maxTimeGenerated = max(TimeGenerated);\nlet minTimeGenerated = toscalar(filteredWithMinTime);\nlet maxTimeGenerated = toscalar(filteredWithMaxTime);\nlet environmentPrefix = toscalar(filtered\n| where device contains \"Linux\"\n| extend device = substring(device, 0, 3)\n| project device);\nlet alerts = Alert\n| where TimeGenerated > minTimeGenerated + 15m and TimeGenerated < maxTimeGenerated and AlertName contains environmentPrefix\n| extend device = extract(\".*longhaul\", 0, AlertName)\n| order by TimeGenerated;\nlet descriptions = datatable(description: string)\n[\"Reported Metrics\",\n \"Queue Length\",\n \"Upstream Message Rate\",\n \"Local Message Rate\",\n \"Reported Property Rate\",\n \"Queue Length\",\n \"Edge Agent CPU\",\n \"Edge Hub CPU\",\n \"Edge Agent Memory\",\n \"Edge Hub Memory\",\n \"Failed Module Starts\",\n \"Disk Space\"\n ];\nlet devices = filtered\n| summarize count() by device\n| extend device = tolower(device)\n| project device;\nlet deviceDescriptions = devices \n| extend dummy=1 | join kind = inner (descriptions | extend dummy=1) on dummy // dummy cross join from AzureDiagnostics\n| project device, description\n| extend violations = 0; \nlet numberOfMetrics = alerts\n| where AlertDescription contains \"number-of-metrics-too-low\" or AlertDescription contains \"number-of-metrics-too-high\"\n| summarize violations = count() by device\n| extend description = \"Reported Metrics\";\nlet upstreamMessageRate = alerts\n| where AlertDescription contains \"upstream-message-rate\" or AlertDescription contains \"no-upstream-messages\"\n| summarize violations = count() by device\n| extend description = \"Upstream Message Rate\";\nlet localMessageRate = alerts\n| where AlertDescription contains \"local-message-rate\" or AlertDescription contains \"no-local-messages\"\n| summarize violations = count() by device\n| extend description = \"Local Message Rate\";\nlet reportedPropertyRate = alerts\n| where AlertDescription contains \"reported-property-rate\" or AlertDescription contains \"no-reported-properties\"\n| summarize violations = count() by device\n| extend description = \"Reported Property Rate\";\nlet queueLength = alerts\n| where AlertDescription contains \"queue-length\"\n| summarize violations = count() by device\n| extend description = \"Queue Length\";\nlet edgeAgentCPU = alerts\n| where AlertDescription contains \"edge-agent-cpu\"\n| summarize violations = count() by device\n| extend description = \"Edge Agent CPU\";\nlet edgeHubCPU = alerts\n| where AlertDescription contains \"edge-hub-cpu\"\n| summarize violations = count() by device\n| extend description = \"Edge Hub CPU\";\nlet edgeAgentMemory = alerts\n| where AlertDescription contains \"edge-agent-memory\"\n| summarize violations = count() by device\n| extend description = \"Edge Agent Memory\";\nlet edgeHubMemory = alerts\n| where AlertDescription contains \"edge-hub-memory\"\n| summarize violations = count() by device\n| extend description = \"Edge Hub Memory\";\nlet moduleStarts = alerts\n| where AlertDescription contains \"failed-module-starts\"\n| summarize violations = count() by device\n| extend description = \"Failed Module Starts\";\nlet diskSpace = alerts\n| where AlertDescription contains \"disk-space\"\n| summarize violations = count() by device\n| extend description = \"Disk Space\";\nlet mergeViolations = (T1:(device: string, description: string, violations: long), T2:(device: string, description: string, violations: long)) {\nT1\n | join kind = leftouter (T2 | extend device = tolower(device)) on device, description\n | project-away device1, description1\n | extend violations = iff(isnull(violations1) == false, violations1, violations)\n | project-away violations1\n | extend violations = iff(device contains \"winpro\" and (description contains \"Memory\" or description contains \"Disk\"), -1, violations)\n};\nlet tmp0 = mergeViolations(deviceDescriptions, numberOfMetrics);\nlet tmp1 = mergeViolations(tmp0, upstreamMessageRate);\nlet tmp2 = mergeViolations(tmp1, localMessageRate);\nlet tmp3 = mergeViolations(tmp2, reportedPropertyRate);\nlet tmp4 = mergeViolations(tmp3, queueLength);\nlet tmp5 = mergeViolations(tmp4, edgeAgentCPU);\nlet tmp6 = mergeViolations(tmp5, edgeHubCPU);\nlet tmp7 = mergeViolations(tmp6, edgeAgentMemory);\nlet tmp8 = mergeViolations(tmp7, edgeHubMemory);\nlet tmp9 = mergeViolations(tmp8, moduleStarts);\nlet tmp10 = mergeViolations(tmp9, diskSpace);\ntmp10\n| evaluate pivot(description, avg(violations))\n| project-reorder * asc\n\n", + "query": "\nlet mostRecentTestBuildNumber = toscalar(sanitizedTestMetrics\n| where testType == 'LongHaul' and imagesBranchName == \"$BranchName\"\n| summarize arg_max(TimeGenerated, testBuildNumber)\n| project testBuildNumber);\nlet filtered = sanitizedTestMetrics\n| where testBuildNumber == mostRecentTestBuildNumber and testType == \"LongHaul\";\nlet filteredWithMinTime = filtered\n| summarize minTimeGenerated = min(TimeGenerated);\nlet filteredWithMaxTime = filtered\n| summarize maxTimeGenerated = max(TimeGenerated);\nlet minTimeGenerated = toscalar(filteredWithMinTime);\nlet maxTimeGenerated = toscalar(filteredWithMaxTime);\nlet environmentPrefix = toscalar(filtered\n| where device contains \"Linux\"\n| extend device = substring(device, 0, 3)\n| project device);\nlet alerts = Alert\n| where TimeGenerated > minTimeGenerated + 15m and TimeGenerated < maxTimeGenerated and AlertName contains environmentPrefix\n| extend device = extract(\".*longhaul\", 0, AlertName)\n| order by TimeGenerated;\nlet descriptions = datatable(description: string)\n[\"Reported Metrics\",\n \"Queue Length\",\n \"Upstream Message Rate\",\n \"Local Message Rate\",\n \"Reported Property Rate\",\n \"Queue Length\",\n \"Edge Agent CPU\",\n \"Edge Hub CPU\",\n \"Edge Agent Memory\",\n \"Edge Hub Memory\",\n \"Failed Module Starts\",\n \"Disk Space\"\n ];\n let getConsecutiveFailures = (T1:(AlertName: string, TimeGenerated: datetime)) {\n T1\n | order by AlertName, TimeGenerated asc\n | extend nextAlertName = next(AlertName, 1)\n | extend nextTime = next(TimeGenerated, 1)\n | extend next2Time = next(TimeGenerated, 2)\n | project-reorder AlertName, TimeGenerated\n | extend isError = iff(nextTime - TimeGenerated < 20m and next2Time - nextTime < 20m and AlertName == nextAlertName, 1, 0)\n | where isError == 1\n};\nlet devices = filtered\n| summarize count() by device\n| extend device = tolower(device)\n| project device;\nlet deviceDescriptions = devices \n| extend dummy=1 | join kind = inner (descriptions | extend dummy=1) on dummy // dummy cross join from AzureDiagnostics\n| project device, description\n| extend violations = 0; \nlet numberOfMetrics = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"number-of-metrics-too-low\" or AlertDescription contains \"number-of-metrics-too-high\"\n| summarize violations = count() by device\n| extend description = \"Reported Metrics\";\nlet upstreamMessageRate = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"upstream-message-rate\" or AlertDescription contains \"no-upstream-messages\"\n| summarize violations = count() by device\n| extend description = \"Upstream Message Rate\";\nlet localMessageRate = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"local-message-rate\" or AlertDescription contains \"no-local-messages\"\n| summarize violations = count() by device\n| extend description = \"Local Message Rate\";\nlet reportedPropertyRate = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"reported-property-rate\" or AlertDescription contains \"no-reported-properties\"\n| summarize violations = count() by device\n| extend description = \"Reported Property Rate\";\nlet queueLength = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"queue-length\"\n| summarize violations = count() by device\n| extend description = \"Queue Length\";\nlet edgeAgentCPU = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"edge-agent-cpu\"\n| summarize violations = count() by device\n| extend description = \"Edge Agent CPU\";\nlet edgeHubCPU = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"edge-hub-cpu\"\n| summarize violations = count() by device\n| extend description = \"Edge Hub CPU\";\nlet edgeAgentMemory = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"edge-agent-memory\"\n| summarize violations = count() by device\n| extend description = \"Edge Agent Memory\";\nlet edgeHubMemory = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"edge-hub-memory\"\n| summarize violations = count() by device\n| extend description = \"Edge Hub Memory\";\nlet moduleStarts = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"failed-module-starts\"\n| summarize violations = count() by device\n| extend description = \"Failed Module Starts\";\nlet diskSpace = getConsecutiveFailures(alerts)\n| where AlertDescription contains \"disk-space\"\n| summarize violations = count() by device\n| extend description = \"Disk Space\";\nlet mergeViolations = (T1:(device: string, description: string, violations: long), T2:(device: string, description: string, violations: long)) {\nT1\n | join kind = leftouter (T2 | extend device = tolower(device)) on device, description\n | project-away device1, description1\n | extend violations = iff(isnull(violations1) == false, violations1, violations)\n | project-away violations1\n | extend violations = iff(device contains \"winpro\" and (description contains \"Memory\" or description contains \"Disk\" or description contains \"cpu\"), -1, violations)\n};\nlet tmp0 = mergeViolations(deviceDescriptions, numberOfMetrics);\nlet tmp1 = mergeViolations(tmp0, upstreamMessageRate);\nlet tmp2 = mergeViolations(tmp1, localMessageRate);\nlet tmp3 = mergeViolations(tmp2, reportedPropertyRate);\nlet tmp4 = mergeViolations(tmp3, queueLength);\nlet tmp5 = mergeViolations(tmp4, edgeAgentCPU);\nlet tmp6 = mergeViolations(tmp5, edgeHubCPU);\nlet tmp7 = mergeViolations(tmp6, edgeAgentMemory);\nlet tmp8 = mergeViolations(tmp7, edgeHubMemory);\nlet tmp9 = mergeViolations(tmp8, moduleStarts);\nlet tmp10 = mergeViolations(tmp9, diskSpace);\ntmp10\n| evaluate pivot(description, avg(violations))\n| project-reorder * asc\n", "resultFormat": "table", "workspace": "fdf47b96-87f3-4b86-90b9-d83e2deae8a0" }, @@ -2954,5 +2957,5 @@ "timezone": "", "title": "Home", "uid": "OLjJ46wWz", - "version": 130 + "version": 1 } \ No newline at end of file