Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CH] Migrate TorchInductor benchmark page #5769

Merged
merged 1 commit into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
{
"branches": "String",
"commits": "String",
"compilers": "String",
"branches": "Array(String)",
"commits": "Array(String)",
"compilers": "Array(String)",
"device": "String",
"dtypes": "String",
"getJobId": "Bool",
"granularity": "String",
"mode": "String",
"startTime": "DateTime64(3)",
"stopTime": "DateTime64(3)",
"suites": "String",
"timezone": "String",
"suites": "Array(String)",
"workflowId": "Int64"
}
}
324 changes: 181 additions & 143 deletions torchci/clickhouse_queries/compilers_benchmark_performance/query.sql
Original file line number Diff line number Diff line change
@@ -1,152 +1,190 @@
-- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted
-- This query is used to get the PT2 benchmark results from different experiments
-- to powers the TorchInductor benchmark dashboard
WITH performance_results AS (
SELECT
name,
IF(speedup = 'infra_error', NULL, speedup) AS speedup, -- Handle the recent burst of infra error
REPLACE(
filename,
CONCAT(
'_', : dtypes, '_', : mode, '_', : device,
'_performance'
)
) AS filename,
compilation_latency,
compression_ratio,
abs_latency,
mfu,
memory_bandwidth,
dynamo_peak_mem,
eager_peak_mem,
workflow_id,
CAST(job_id AS INT) AS job_id,
FROM
inductor.torch_dynamo_perf_stats_v2
WHERE
filename LIKE CONCAT(
'%_', : dtypes, '_', : mode, '_', : device,
'_performance%'
)
AND TIMESTAMP_MILLIS(timestamp) >= PARSE_DATETIME_ISO8601(:startTime)
AND TIMESTAMP_MILLIS(timestamp) < PARSE_DATETIME_ISO8601(:stopTime)
AND (workflow_id = :workflowId OR :workflowId = 0)
SELECT
name,
IF(speedup = 'infra_error', '', speedup) AS speedup,
-- Handle the recent burst of infra error
REPLACE(
filename,
CONCAT(
'_',
{ dtypes: String },
'_',
{ mode: String },
'_',
{device: String },
'_performance'
),
''
) AS replaced_filename,
compilation_latency,
compression_ratio,
abs_latency,
dynamo_peak_mem,
eager_peak_mem,
workflow_id,
toInt64(job_id) AS job_id,
timestamp
FROM
benchmark.inductor_torch_dynamo_perf_stats
WHERE
filename LIKE CONCAT(
'%_',
{ dtypes: String },
'_',
{mode: String },
'_',
{device: String },
'_performance%'
)
AND timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
AND timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
AND (
workflow_id = { workflowId: Int64 }
OR { workflowId: Int64 } = 0
)
),
accuracy_results AS (
SELECT
name,
accuracy,
REPLACE(
filename,
CONCAT(
'_', : dtypes, '_', : mode, '_', : device,
'_accuracy'
)
) AS filename,
workflow_id,
CAST(job_id AS INT) AS job_id,
FROM
inductor.torch_dynamo_perf_stats_v2
WHERE
filename LIKE CONCAT(
'%_', : dtypes, '_', : mode, '_', : device,
'_accuracy%'
)
AND TIMESTAMP_MILLIS(timestamp) >= PARSE_DATETIME_ISO8601(:startTime)
AND TIMESTAMP_MILLIS(timestamp) < PARSE_DATETIME_ISO8601(:stopTime)
AND (workflow_id = :workflowId OR :workflowId = 0)
AND accuracy != 'model_fail_to_load'
AND accuracy != 'eager_fail_to_run'
SELECT
name,
accuracy,
REPLACE(
filename,
CONCAT(
'_',
{ dtypes: String },
'_',
{mode: String },
'_',
{device: String },
'_accuracy'
),
''
) AS replaced_filename,
workflow_id,
toInt64(job_id) AS job_id,
timestamp
FROM
benchmark.inductor_torch_dynamo_perf_stats
WHERE
filename LIKE CONCAT(
'%_',
{ dtypes: String },
'_',
{mode: String },
'_',
{device: String },
'_accuracy%'
)
AND timestamp >= toUnixTimestamp64Milli({startTime: DateTime64(3) })
AND timestamp < toUnixTimestamp64Milli({stopTime: DateTime64(3) })
AND (
workflow_id = { workflowId: Int64 }
OR { workflowId: Int64 } = 0
)
AND accuracy != 'model_fail_to_load'
AND accuracy != 'eager_fail_to_run'
),
results AS (
SELECT
accuracy_results.workflow_id AS workflow_id,
accuracy_results.job_id AS job_id,
CASE
WHEN accuracy_results.filename LIKE '%_torchbench' THEN 'torchbench'
WHEN accuracy_results.filename LIKE '%_timm_models' THEN 'timm_models'
WHEN accuracy_results.filename LIKE '%_huggingface' THEN 'huggingface'
ELSE NULL
END AS suite,
CASE
WHEN accuracy_results.filename LIKE '%_torchbench' THEN REPLACE(
accuracy_results.filename, '_torchbench'
)
WHEN accuracy_results.filename LIKE '%_timm_models' THEN REPLACE(
accuracy_results.filename, '_timm_models'
)
WHEN accuracy_results.filename LIKE '%_huggingface' THEN REPLACE(
accuracy_results.filename, '_huggingface'
)
ELSE NULL
END AS compiler,
accuracy_results.name,
IF(TRY_CAST(speedup AS FLOAT) IS NOT NULL,
CAST(speedup AS FLOAT),
0.0
) AS speedup,
accuracy,
IF(TRY_CAST(compilation_latency AS FLOAT) IS NOT NULL,
CAST(compilation_latency AS FLOAT),
0.0
) AS compilation_latency,
IF(TRY_CAST(compression_ratio AS FLOAT) IS NOT NULL,
CAST(compression_ratio AS FLOAT),
0.0
) AS compression_ratio,
IF(TRY_CAST(abs_latency AS FLOAT) IS NOT NULL,
CAST(abs_latency AS FLOAT),
0.0
) AS abs_latency,
IF(TRY_CAST(mfu AS FLOAT) IS NOT NULL,
CAST(mfu AS FLOAT),
0.0
) AS mfu,
IF(TRY_CAST(memory_bandwidth AS FLOAT) IS NOT NULL,
CAST(memory_bandwidth AS FLOAT),
0.0
) AS memory_bandwidth,
IF(TRY_CAST(dynamo_peak_mem AS FLOAT) IS NOT NULL,
CAST(dynamo_peak_mem AS FLOAT),
0.0
) AS dynamo_peak_mem,
IF(TRY_CAST(eager_peak_mem AS FLOAT) IS NOT NULL,
CAST(eager_peak_mem AS FLOAT),
0.0
) AS eager_peak_mem,
FROM
accuracy_results
LEFT JOIN performance_results ON performance_results.name = accuracy_results.name
AND performance_results.filename = accuracy_results.filename
AND performance_results.workflow_id = accuracy_results.workflow_id
SELECT
accuracy_results.workflow_id AS workflow_id,
accuracy_results.job_id AS job_id,
CASE
WHEN accuracy_results.replaced_filename LIKE '%_torchbench' THEN 'torchbench'
WHEN accuracy_results.replaced_filename LIKE '%_timm_models' THEN 'timm_models'
WHEN accuracy_results.replaced_filename LIKE '%_huggingface' THEN 'huggingface'
ELSE NULL
END AS suite,
CASE
WHEN accuracy_results.replaced_filename LIKE '%_torchbench' THEN REPLACE(
accuracy_results.replaced_filename,
'_torchbench',
''
)
WHEN accuracy_results.replaced_filename LIKE '%_timm_models' THEN REPLACE(
accuracy_results.replaced_filename,
'_timm_models',
''
)
WHEN accuracy_results.replaced_filename LIKE '%_huggingface' THEN REPLACE(
accuracy_results.replaced_filename,
'_huggingface',
''
)
ELSE NULL
END AS compiler,
accuracy_results.name,
IF(speedup != '', toFloat32(speedup), 0.0) AS speedup,
accuracy,
IF(
compilation_latency != '',
toFloat32(compilation_latency),
0.0
) AS compilation_latency,
IF(
compression_ratio != '',
toFloat32(compression_ratio),
0.0
) AS compression_ratio,
IF(abs_latency != '', toFloat32(abs_latency), 0.0) AS abs_latency,
IF(
dynamo_peak_mem != '',
toFloat32(dynamo_peak_mem),
0.0
) AS dynamo_peak_mem,
IF(eager_peak_mem != '', toFloat32(eager_peak_mem), 0.0) AS eager_peak_mem,
IF(
performance_results.timestamp != 0,
performance_results.timestamp,
accuracy_results.timestamp
) AS timestamp
FROM
accuracy_results
LEFT JOIN performance_results ON performance_results.name = accuracy_results.name
AND performance_results.replaced_filename = accuracy_results.replaced_filename
AND performance_results.workflow_id = accuracy_results.workflow_id
)
SELECT DISTINCT
results.workflow_id,
-- As the JSON response is pretty big, only return the field if it's needed
IF(:getJobId, results.job_id, NULL) AS job_id,
results.suite,
results.compiler,
results.name,
results.speedup,
results.accuracy,
results.compilation_latency,
results.compression_ratio,
results.abs_latency,
results.mfu,
results.memory_bandwidth,
results.dynamo_peak_mem,
results.eager_peak_mem,
FORMAT_ISO8601(
DATE_TRUNC(: granularity, w._event_time)
) AS granularity_bucket,
SELECT
DISTINCT results.workflow_id,
IF({getJobId: Bool}, results.job_id, 0) AS job_id,
results.suite,
results.compiler,
results.name,
results.speedup,
results.accuracy,
results.compilation_latency,
results.compression_ratio,
results.abs_latency,
results.dynamo_peak_mem,
results.eager_peak_mem,
DATE_TRUNC(
{granularity: String },
fromUnixTimestamp64Milli(results.timestamp)
) AS granularity_bucket
FROM
results LEFT JOIN commons.workflow_run w ON results.workflow_id = w.id
results
LEFT JOIN default .workflow_run w FINAL ON results.workflow_id = w.id
WHERE
ARRAY_CONTAINS(SPLIT(:suites, ','), LOWER(results.suite))
AND (ARRAY_CONTAINS(SPLIT(:compilers, ','), LOWER(results.compiler)) OR :compilers = '')
AND (ARRAY_CONTAINS(SPLIT(:branches, ','), head_branch) OR :branches = '')
AND (ARRAY_CONTAINS(SPLIT(:commits, ','), head_sha) OR :commits = '')
has({suites: Array(String) }, lower(results.suite))
AND (
has(
{compilers: Array(String) },
lower(results.compiler)
)
OR empty({compilers: Array(String) })
)
AND (
has({branches: Array(String) }, head_branch)
OR empty({branches: Array(String) })
)
AND (
has({commits: Array(String) }, head_sha)
OR empty({commits: Array(String) })
)
ORDER BY
granularity_bucket DESC,
workflow_id DESC,
suite ASC,
compiler ASC,
name ASC
granularity_bucket DESC,
workflow_id DESC,
suite ASC,
compiler ASC,
name ASC
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
{
"device": "String",
"dtypes": "String",
"granularity": "String",
"mode": "String",
"startTime": "DateTime64(3)",
"stopTime": "DateTime64(3)"
}
}
Loading
Loading