From f2edda9d88123362dcd91c0d4cb766685ac9bee6 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 17 Dec 2024 14:37:05 -0500 Subject: [PATCH] fix(ingest/snowflake): improve lineage parse failure logging Follow up on https://github.com/datahub-project/datahub/pull/12125 --- .../ingestion/source/snowflake/snowflake_lineage_v2.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py index 93d84d8b246e51..c769c6705ac3f6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py @@ -414,9 +414,13 @@ def _process_upstream_lineage_row( except Exception as e: self.report.num_upstream_lineage_edge_parsing_failed += 1 upstream_tables = db_row.get("UPSTREAM_TABLES") + downstream_table = db_row.get("DOWNSTREAM_TABLE_NAME") self.structured_reporter.warning( "Failed to parse lineage edge", - context=f"Upstreams: {upstream_tables} Downstreams: {db_row.get('DOWNSTREAM_TABLE_NAME')}", + # Tricky: sometimes the full row data is too large, and so the context + # message gets truncated. By pulling out the upstreams and downstream + # list, we can at least get the important fields if truncation does occur. + context=f"Upstreams: {upstream_tables} Downstream: {downstream_table} Full row: {db_row}", exc=e, ) return None