diff --git a/terraform/environments/electronic-monitoring-data/glue-job/dms_dv_on_rows_hashvalue_partitionby_yyyy_mm.py b/terraform/environments/electronic-monitoring-data/glue-job/dms_dv_on_rows_hashvalue_partitionby_yyyy_mm.py index 4a6900a178e..d41122e038a 100644 --- a/terraform/environments/electronic-monitoring-data/glue-job/dms_dv_on_rows_hashvalue_partitionby_yyyy_mm.py +++ b/terraform/environments/electronic-monitoring-data/glue-job/dms_dv_on_rows_hashvalue_partitionby_yyyy_mm.py @@ -239,7 +239,11 @@ def write_parquet_to_s3(df_dv_output: DataFrame, database, db_sch_tbl_name): DATE_PARTITION_COLUMN_NAME, TABLE_PKEY_COLUMN, args.get("rds_only_where_clause", None)) - + + for e in hashed_rows_agg_schema: + rds_table_row_stats_df_agg = rds_table_row_stats_df_agg.withColumn( + e.name, F.col(f"{e.name}").cast(e.dataType)) + if S3Methods.check_s3_folder_path_if_exists(RDS_HASHED_ROWS_PRQ_BUCKET, f"{rds_hashed_rows_bucket_parent_dir}/rds_table_row_stats_df_agg"): @@ -257,9 +261,6 @@ def write_parquet_to_s3(df_dv_output: DataFrame, database, db_sch_tbl_name): ], group_by_cols_list ) - for e in hashed_rows_agg_schema: - df_prq_rds_table_agg_row_stats = df_prq_rds_table_agg_row_stats.withColumn( - e.name, F.col(f"{e.name}").cast(e.dataType)) prq_rds_table_row_stats_df_agg_updated.write\ .mode("overwrite")\