pytorch · huydhn · Oct 16, 2024 · Oct 16, 2024
diff --git a/torchci/clickhouse_queries/compilers_benchmark_performance/params.json b/torchci/clickhouse_queries/compilers_benchmark_performance/params.json
@@ -1,15 +1,14 @@
 {
-  "branches": "String",
-  "commits": "String",
-  "compilers": "String",
+  "branches": "Array(String)",
+  "commits": "Array(String)",
+  "compilers": "Array(String)",
   "device": "String",
   "dtypes": "String",
   "getJobId": "Bool",
   "granularity": "String",
   "mode": "String",
   "startTime": "DateTime64(3)",
   "stopTime": "DateTime64(3)",
-  "suites": "String",
-  "timezone": "String",
+  "suites": "Array(String)",
   "workflowId": "Int64"
-}
+}
diff --git a/torchci/clickhouse_queries/compilers_benchmark_performance/query.sql b/torchci/clickhouse_queries/compilers_benchmark_performance/query.sql
@@ -1,152 +1,190 @@
--- !!! Query is not converted to CH syntax yet.  Delete this line when it gets converted
+-- This query is used to get the PT2 benchmark results from different experiments
+-- to powers the TorchInductor benchmark dashboard
 WITH performance_results AS (
-  SELECT
-    name,
-    IF(speedup = 'infra_error', NULL, speedup) AS speedup, -- Handle the recent burst of infra error
-    REPLACE(
-      filename,
-      CONCAT(
-        '_', : dtypes, '_', : mode, '_', : device,
-        '_performance'
-      )
-    ) AS filename,
-    compilation_latency,
-    compression_ratio,
-    abs_latency,
-    mfu,
-    memory_bandwidth,
-    dynamo_peak_mem,
-    eager_peak_mem,
-    workflow_id,
-    CAST(job_id AS INT) AS job_id,
-  FROM
-    inductor.torch_dynamo_perf_stats_v2
-  WHERE
-    filename LIKE CONCAT(
-      '%_', : dtypes, '_', : mode, '_', : device,
-      '_performance%'
-    )
-    AND TIMESTAMP_MILLIS(timestamp) >= PARSE_DATETIME_ISO8601(:startTime)
-    AND TIMESTAMP_MILLIS(timestamp) < PARSE_DATETIME_ISO8601(:stopTime)
-    AND (workflow_id = :workflowId OR :workflowId = 0)    
+    SELECT
+        name,
+        IF(speedup = 'infra_error', '', speedup) AS speedup,
+        -- Handle the recent burst of infra error
+        REPLACE(
+            filename,
+            CONCAT(
+                '_',
+                { dtypes: String },
+                '_',
+                { mode: String },
+                '_',
+                {device: String },
+                '_performance'
+            ),
+            ''
+        ) AS replaced_filename,
+        compilation_latency,
+        compression_ratio,
+        abs_latency,
+        dynamo_peak_mem,
+        eager_peak_mem,
+        workflow_id,
+        toInt64(job_id) AS job_id,
+        timestamp
+    FROM
+        benchmark.inductor_torch_dynamo_perf_stats
+    WHERE
+        filename LIKE CONCAT(
+            '%_',
+            { dtypes: String },
+            '_',
+            {mode: String },
+            '_',
+            {device: String },
+            '_performance%'
+        )
+        AND timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
+        AND timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
+        AND (
+            workflow_id = { workflowId: Int64 }
+            OR { workflowId: Int64 } = 0
+        )
 ),
 accuracy_results AS (
-  SELECT
-    name,
-    accuracy,
-    REPLACE(
-      filename,
-      CONCAT(
-        '_', : dtypes, '_', : mode, '_', : device,
-        '_accuracy'
-      )
-    ) AS filename,
-    workflow_id,
-    CAST(job_id AS INT) AS job_id,
-  FROM
-    inductor.torch_dynamo_perf_stats_v2
-  WHERE
-    filename LIKE CONCAT(
-      '%_', : dtypes, '_', : mode, '_', : device,
-      '_accuracy%'
-    )
-    AND TIMESTAMP_MILLIS(timestamp) >= PARSE_DATETIME_ISO8601(:startTime)
-    AND TIMESTAMP_MILLIS(timestamp) < PARSE_DATETIME_ISO8601(:stopTime)
-    AND (workflow_id = :workflowId OR :workflowId = 0)
-    AND accuracy != 'model_fail_to_load'
-    AND accuracy != 'eager_fail_to_run'
+    SELECT
+        name,
+        accuracy,
+        REPLACE(
+            filename,
+            CONCAT(
+                '_',
+                { dtypes: String },
+                '_',
+                {mode: String },
+                '_',
+                {device: String },
+                '_accuracy'
+            ),
+            ''
+        ) AS replaced_filename,
+        workflow_id,
+        toInt64(job_id) AS job_id,
+        timestamp
+    FROM
+        benchmark.inductor_torch_dynamo_perf_stats
+    WHERE
+        filename LIKE CONCAT(
+            '%_',
+            { dtypes: String },
+            '_',
+            {mode: String },
+            '_',
+            {device: String },
+            '_accuracy%'
+        )
+        AND timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
+        AND timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
+        AND (
+            workflow_id = { workflowId: Int64 }
+            OR { workflowId: Int64 } = 0
+        )
+        AND accuracy != 'model_fail_to_load'
+        AND accuracy != 'eager_fail_to_run'
 ),
 results AS (
-  SELECT
-    accuracy_results.workflow_id AS workflow_id,
-    accuracy_results.job_id AS job_id,
-    CASE
-      WHEN accuracy_results.filename LIKE '%_torchbench' THEN 'torchbench'
-      WHEN accuracy_results.filename LIKE '%_timm_models' THEN 'timm_models'
-      WHEN accuracy_results.filename LIKE '%_huggingface' THEN 'huggingface'
-      ELSE NULL
-    END AS suite,
-    CASE
-      WHEN accuracy_results.filename LIKE '%_torchbench' THEN REPLACE(
-        accuracy_results.filename, '_torchbench'
-      )
-      WHEN accuracy_results.filename LIKE '%_timm_models' THEN REPLACE(
-        accuracy_results.filename, '_timm_models'
-      )
-      WHEN accuracy_results.filename LIKE '%_huggingface' THEN REPLACE(
-        accuracy_results.filename, '_huggingface'
-      )
-      ELSE NULL
-    END AS compiler,
-    accuracy_results.name,
-    IF(TRY_CAST(speedup AS FLOAT) IS NOT NULL,
-      CAST(speedup AS FLOAT),
-      0.0
-    ) AS speedup,
-    accuracy,
-    IF(TRY_CAST(compilation_latency AS FLOAT) IS NOT NULL,
-      CAST(compilation_latency AS FLOAT),
-      0.0
-    ) AS compilation_latency,
-    IF(TRY_CAST(compression_ratio AS FLOAT) IS NOT NULL,
-      CAST(compression_ratio AS FLOAT),
-      0.0
-    ) AS compression_ratio,
-    IF(TRY_CAST(abs_latency AS FLOAT) IS NOT NULL,
-      CAST(abs_latency AS FLOAT),
-      0.0
-    ) AS abs_latency,
-    IF(TRY_CAST(mfu AS FLOAT) IS NOT NULL,
-      CAST(mfu AS FLOAT),
-      0.0
-    ) AS mfu,
-    IF(TRY_CAST(memory_bandwidth AS FLOAT) IS NOT NULL,
-      CAST(memory_bandwidth AS FLOAT),
-      0.0
-    ) AS memory_bandwidth,
-    IF(TRY_CAST(dynamo_peak_mem AS FLOAT) IS NOT NULL,
-      CAST(dynamo_peak_mem AS FLOAT),
-      0.0
-    ) AS dynamo_peak_mem,
-    IF(TRY_CAST(eager_peak_mem AS FLOAT) IS NOT NULL,
-      CAST(eager_peak_mem AS FLOAT),
-      0.0
-    ) AS eager_peak_mem,
-  FROM
-    accuracy_results
-    LEFT JOIN performance_results ON performance_results.name = accuracy_results.name
-    AND performance_results.filename = accuracy_results.filename
-    AND performance_results.workflow_id = accuracy_results.workflow_id
+    SELECT
+        accuracy_results.workflow_id AS workflow_id,
+        accuracy_results.job_id AS job_id,
+        CASE
+            WHEN accuracy_results.replaced_filename LIKE '%_torchbench' THEN 'torchbench'
+            WHEN accuracy_results.replaced_filename LIKE '%_timm_models' THEN 'timm_models'
+            WHEN accuracy_results.replaced_filename LIKE '%_huggingface' THEN 'huggingface'
+            ELSE NULL
+        END AS suite,
+        CASE
+            WHEN accuracy_results.replaced_filename LIKE '%_torchbench' THEN REPLACE(
+                accuracy_results.replaced_filename,
+                '_torchbench',
+                ''
+            )
+            WHEN accuracy_results.replaced_filename LIKE '%_timm_models' THEN REPLACE(
+                accuracy_results.replaced_filename,
+                '_timm_models',
+                ''
+            )
+            WHEN accuracy_results.replaced_filename LIKE '%_huggingface' THEN REPLACE(
+                accuracy_results.replaced_filename,
+                '_huggingface',
+                ''
+            )
+            ELSE NULL
+        END AS compiler,
+        accuracy_results.name,
+        IF(speedup != '', toFloat32(speedup), 0.0) AS speedup,
+        accuracy,
+        IF(
+            compilation_latency != '',
+            toFloat32(compilation_latency),
+            0.0
+        ) AS compilation_latency,
+        IF(
+            compression_ratio != '',
+            toFloat32(compression_ratio),
+            0.0
+        ) AS compression_ratio,
+        IF(abs_latency != '', toFloat32(abs_latency), 0.0) AS abs_latency,
+        IF(
+            dynamo_peak_mem != '',
+            toFloat32(dynamo_peak_mem),
+            0.0
+        ) AS dynamo_peak_mem,
+        IF(eager_peak_mem != '', toFloat32(eager_peak_mem), 0.0) AS eager_peak_mem,
+        IF(
+            performance_results.timestamp != 0,
+            performance_results.timestamp,
+            accuracy_results.timestamp
+        ) AS timestamp
+    FROM
+        accuracy_results
+        LEFT JOIN performance_results ON performance_results.name = accuracy_results.name
+        AND performance_results.replaced_filename = accuracy_results.replaced_filename
+        AND performance_results.workflow_id = accuracy_results.workflow_id
 )
-SELECT DISTINCT
-  results.workflow_id,
-  -- As the JSON response is pretty big, only return the field if it's needed
-  IF(:getJobId, results.job_id, NULL) AS job_id,
-  results.suite,
-  results.compiler,
-  results.name,
-  results.speedup,
-  results.accuracy,
-  results.compilation_latency,
-  results.compression_ratio,
-  results.abs_latency,
-  results.mfu,
-  results.memory_bandwidth,
-  results.dynamo_peak_mem,
-  results.eager_peak_mem,
-  FORMAT_ISO8601(
-    DATE_TRUNC(: granularity, w._event_time)
-  ) AS granularity_bucket,
+SELECT
+    DISTINCT results.workflow_id,
+    IF({getJobId: Bool}, results.job_id, 0) AS job_id,
+    results.suite,
+    results.compiler,
+    results.name,
+    results.speedup,
+    results.accuracy,
+    results.compilation_latency,
+    results.compression_ratio,
+    results.abs_latency,
+    results.dynamo_peak_mem,
+    results.eager_peak_mem,
+    DATE_TRUNC(
+        {granularity: String },
+        fromUnixTimestamp64Milli(results.timestamp)
+    ) AS granularity_bucket
 FROM
-  results LEFT JOIN commons.workflow_run w ON results.workflow_id = w.id
+    results
+    LEFT JOIN default .workflow_run w FINAL ON results.workflow_id = w.id
 WHERE
-  ARRAY_CONTAINS(SPLIT(:suites, ','), LOWER(results.suite))
-  AND (ARRAY_CONTAINS(SPLIT(:compilers, ','), LOWER(results.compiler)) OR :compilers = '')
-  AND (ARRAY_CONTAINS(SPLIT(:branches, ','), head_branch) OR :branches = '')
-  AND (ARRAY_CONTAINS(SPLIT(:commits, ','), head_sha) OR :commits = '')  
+    has({suites: Array(String) }, lower(results.suite))
+    AND (
+        has(
+            {compilers: Array(String) },
+            lower(results.compiler)
+        )
+        OR empty({compilers: Array(String) })
+    )
+    AND (
+        has({branches: Array(String) }, head_branch)
+        OR empty({branches: Array(String) })
+    )
+    AND (
+        has({commits: Array(String) }, head_sha)
+        OR empty({commits: Array(String) })
+    )
 ORDER BY
-  granularity_bucket DESC,
-  workflow_id DESC,
-  suite ASC,
-  compiler ASC,
-  name ASC
+    granularity_bucket DESC,
+    workflow_id DESC,
+    suite ASC,
+    compiler ASC,
+    name ASC
diff --git a/torchci/clickhouse_queries/compilers_benchmark_performance_branches/params.json b/torchci/clickhouse_queries/compilers_benchmark_performance_branches/params.json
@@ -1,8 +1,7 @@
 {
   "device": "String",
   "dtypes": "String",
-  "granularity": "String",
   "mode": "String",
   "startTime": "DateTime64(3)",
   "stopTime": "DateTime64(3)"
-}
+}