From 57549fca82a169681ade72240ca349642fd7c918 Mon Sep 17 00:00:00 2001 From: matthewkrausse <106627640+mattkrausse@users.noreply.github.com> Date: Tue, 20 Feb 2024 15:54:42 -0900 Subject: [PATCH] Add cautionary note about performance in get_row_count method --- parsons/google/google_bigquery.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py index c4002b7997..e08041071d 100644 --- a/parsons/google/google_bigquery.py +++ b/parsons/google/google_bigquery.py @@ -1121,6 +1121,10 @@ def get_row_count(self, schema: str, table_name: str) -> int: """ Gets the row count for a BigQuery materialization. + Caution: This method uses SELECT COUNT(*) which can be expensive for large tables, + especially those with many columns. This is because BigQuery scans all table data + to perform the count, even though only the row count is returned. + `Args`: schema: str The schema name