Skip to content

Commit

Permalink
[COST-5137] Initial managed summary tasks rework (#5469)
Browse files Browse the repository at this point in the history
* Initial managed summary tasks rework
  • Loading branch information
lcouzens authored Feb 6, 2025
1 parent fa5bb8a commit 43da108
Show file tree
Hide file tree
Showing 38 changed files with 636 additions and 141 deletions.
15 changes: 14 additions & 1 deletion koku/masu/database/aws_report_db_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from masu.database import OCP_REPORT_TABLE_MAP
from masu.database.report_db_accessor_base import ReportDBAccessorBase
from masu.processor import is_feature_unattributed_storage_enabled_aws
from masu.processor import is_managed_ocp_cloud_summary_enabled
from masu.processor.parquet.summary_sql_metadata import SummarySqlMetadata
from reporting.models import OCP_ON_ALL_PERSPECTIVES
from reporting.models import OCP_ON_AWS_PERSPECTIVES
Expand Down Expand Up @@ -164,6 +165,13 @@ def populate_ocp_on_aws_ui_summary_tables_trino(
days = self.date_helper.list_days(start_date, end_date)
days_tup = tuple(str(day.day) for day in days)

# TODO Remove this when we switch to managed flow
trino_table = "reporting_ocpawscostlineitem_project_daily_summary"
column_name = "aws_source"
if is_managed_ocp_cloud_summary_enabled(self.schema, Provider.PROVIDER_AWS):
trino_table = "managed_reporting_ocpawscostlineitem_project_daily_summary"
column_name = "source"

for table_name in tables:
sql = pkgutil.get_data("masu.database", f"trino_sql/aws/openshift/{table_name}.sql")
sql = sql.decode("utf-8")
Expand All @@ -176,6 +184,8 @@ def populate_ocp_on_aws_ui_summary_tables_trino(
"days": days_tup,
"aws_source_uuid": aws_provider_uuid,
"ocp_source_uuid": openshift_provider_uuid,
"trino_table": trino_table,
"column_name": column_name,
}
self._execute_trino_raw_sql_query(sql, sql_params=sql_params, log_ref=f"{table_name}.sql")

Expand Down Expand Up @@ -251,7 +261,10 @@ def populate_ocp_on_aws_cost_daily_summary_trino(

unattributed_storage = is_feature_unattributed_storage_enabled_aws(self.schema)

sql = pkgutil.get_data("masu.database", "trino_sql/reporting_ocpawscostlineitem_daily_summary.sql")
sql_file = "trino_sql/reporting_ocpawscostlineitem_daily_summary.sql"
if is_managed_ocp_cloud_summary_enabled(self.schema, Provider.PROVIDER_AWS):
sql_file = "trino_sql/aws/openshift/managed_reporting_ocpawscostlineitem_daily_summary.sql"
sql = pkgutil.get_data("masu.database", sql_file)
sql = sql.decode("utf-8")
sql_params = {
"schema": self.schema,
Expand Down
16 changes: 14 additions & 2 deletions koku/masu/database/azure_report_db_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from masu.database import OCP_REPORT_TABLE_MAP
from masu.database.report_db_accessor_base import ReportDBAccessorBase
from masu.processor import is_feature_unattributed_storage_enabled_azure
from masu.processor import is_managed_ocp_cloud_summary_enabled
from masu.processor.parquet.summary_sql_metadata import SummarySqlMetadata
from reporting.models import OCP_ON_ALL_PERSPECTIVES
from reporting.models import OCP_ON_AZURE_PERSPECTIVES
Expand Down Expand Up @@ -208,6 +209,13 @@ def populate_ocp_on_azure_ui_summary_tables_trino(
days = self.date_helper.list_days(start_date, end_date)
days_tup = tuple(str(day.day) for day in days)

# TODO Remove this when we switch to managed flow
trino_table = "reporting_ocpazurecostlineitem_project_daily_summary"
column_name = "azure_source"
if is_managed_ocp_cloud_summary_enabled(self.schema, Provider.PROVIDER_AZURE):
trino_table = "managed_reporting_ocpazurecostlineitem_project_daily_summary"
column_name = "source"

for table_name in tables:
sql = pkgutil.get_data("masu.database", f"trino_sql/azure/openshift/{table_name}.sql")
sql = sql.decode("utf-8")
Expand All @@ -220,6 +228,8 @@ def populate_ocp_on_azure_ui_summary_tables_trino(
"days": days_tup,
"azure_source_uuid": azure_provider_uuid,
"ocp_source_uuid": openshift_provider_uuid,
"trino_table": trino_table,
"column_name": column_name,
}
self._execute_trino_raw_sql_query(sql, sql_params=sql_params, log_ref=f"{table_name}.sql")

Expand Down Expand Up @@ -287,8 +297,10 @@ def populate_ocp_on_azure_cost_daily_summary_trino(
if distribution == "memory":
pod_column = "pod_effective_usage_memory_gigabyte_hours"
node_column = "node_capacity_memory_gigabyte_hours"

sql = pkgutil.get_data("masu.database", "trino_sql/reporting_ocpazurecostlineitem_daily_summary.sql")
sql_file = "trino_sql/reporting_ocpazurecostlineitem_daily_summary.sql"
if is_managed_ocp_cloud_summary_enabled(self.schema, Provider.PROVIDER_AZURE):
sql_file = "trino_sql/azure/openshift/managed_reporting_ocpazurecostlineitem_daily_summary.sql"
sql = pkgutil.get_data("masu.database", sql_file)
sql = sql.decode("utf-8")
sql_params = {
"uuid": str(openshift_provider_uuid).replace("-", "_"),
Expand Down
14 changes: 14 additions & 0 deletions koku/masu/database/gcp_report_db_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from masu.database import GCP_REPORT_TABLE_MAP
from masu.database import OCP_REPORT_TABLE_MAP
from masu.database.report_db_accessor_base import ReportDBAccessorBase
from masu.processor import is_managed_ocp_cloud_summary_enabled
from masu.processor.parquet.summary_sql_metadata import SummarySqlMetadata
from masu.util.gcp.common import check_resource_level
from masu.util.ocp.common import get_cluster_alias_from_cluster_id
Expand Down Expand Up @@ -348,6 +349,9 @@ def populate_ocp_on_gcp_cost_daily_summary_trino(
sql_level = "reporting_ocpgcpcostlineitem_daily_summary"
matching_type = "tag"

if is_managed_ocp_cloud_summary_enabled(self.schema, Provider.PROVIDER_GCP):
sql_level = "managed_reporting_ocpgcpcostlineitem_daily_summary"

sql = pkgutil.get_data("masu.database", f"trino_sql/gcp/openshift/{sql_level}.sql")
sql = sql.decode("utf-8")
sql_params = {
Expand Down Expand Up @@ -394,6 +398,14 @@ def populate_ocp_on_gcp_ui_summary_tables_trino(
days = self.date_helper.list_days(start_date, end_date)
days_tup = tuple(str(day.day) for day in days)
invoice_month_list = self.date_helper.gcp_find_invoice_months_in_date_range(start_date, end_date)

# TODO Remove this when we switch to managed flow
trino_table = "reporting_ocpgcpcostlineitem_project_daily_summary"
column_name = "gcp_source"
if is_managed_ocp_cloud_summary_enabled(self.schema, Provider.PROVIDER_GCP):
trino_table = "managed_reporting_ocpgcpcostlineitem_project_daily_summary"
column_name = "gcp_source"

for invoice_month in invoice_month_list:
for table_name in tables:
sql = pkgutil.get_data("masu.database", f"trino_sql/gcp/openshift/{table_name}.sql")
Expand All @@ -408,6 +420,8 @@ def populate_ocp_on_gcp_ui_summary_tables_trino(
"invoice_month": invoice_month,
"gcp_source_uuid": gcp_provider_uuid,
"ocp_source_uuid": openshift_provider_uuid,
"trino_table": trino_table,
"column_name": column_name,
}
self._execute_trino_raw_sql_query(sql, sql_params=sql_params, log_ref=f"{table_name}.sql")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
-- insert managed table data into postgres table

INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary_p (
uuid,
report_period_id,
cluster_id,
cluster_alias,
data_source,
namespace,
node,
persistentvolumeclaim,
persistentvolume,
storageclass,
resource_id,
usage_start,
usage_end,
product_code,
product_family,
instance_type,
cost_entry_bill_id,
usage_account_id,
account_alias_id,
availability_zone,
region,
unit,
usage_amount,
infrastructure_data_in_gigabytes,
infrastructure_data_out_gigabytes,
data_transfer_direction,
currency_code,
unblended_cost,
markup_cost,
blended_cost,
markup_cost_blended,
savingsplan_effective_cost,
markup_cost_savingsplan,
calculated_amortized_cost,
markup_cost_amortized,
pod_cost,
project_markup_cost,
pod_labels,
tags,
aws_cost_category,
cost_category_id,
source_uuid
)
SELECT uuid(),
{{report_period_id | sqlsafe}} as report_period_id,
cluster_id,
cluster_alias,
data_source,
namespace,
node,
persistentvolumeclaim,
persistentvolume,
storageclass,
resource_id,
date(usage_start),
date(usage_end),
product_code,
product_family,
instance_type,
{{bill_id | sqlsafe}} as cost_entry_bill_id,
usage_account_id,
account_alias_id,
availability_zone,
region,
unit,
usage_amount,
CASE
WHEN upper(data_transfer_direction) = 'IN' THEN usage_amount
ELSE 0
END AS infrastructure_data_in_gigabytes,
CASE
WHEN upper(data_transfer_direction) = 'OUT' THEN usage_amount
ELSE 0
END AS infrastructure_data_out_gigabytes,
data_transfer_direction,
currency_code,
unblended_cost,
markup_cost,
blended_cost,
markup_cost_blended,
savingsplan_effective_cost,
markup_cost_savingsplan,
calculated_amortized_cost,
markup_cost_amortized,
pod_cost,
project_markup_cost,
json_parse(pod_labels),
json_parse(tags),
json_parse(aws_cost_category),
cost_category_id,
cast(source as UUID)
FROM hive.{{schema | sqlsafe}}.managed_reporting_ocpawscostlineitem_project_daily_summary
WHERE source = {{aws_source_uuid}}
AND ocp_source = {{ocp_source_uuid}}
AND year = {{year}}
AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters
AND day IN {{days | inclause}}
;
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpaws_compute_summary_p (
sum(markup_cost_amortized),
max(currency_code),
cast({{aws_source_uuid}} as uuid) as source_uuid
FROM hive.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary
WHERE aws_source = {{aws_source_uuid}}
FROM hive.{{schema | sqlsafe}}.{{trino_table | sqlsafe}}
WHERE {{column_name | sqlsafe}} = {{aws_source_uuid}}
AND ocp_source = {{ocp_source_uuid}}
AND year = {{year}}
AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpaws_cost_summary_by_accou
sum(markup_cost_amortized),
max(currency_code),
cast({{aws_source_uuid}} as uuid) as source_uuid
FROM hive.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary
WHERE aws_source = {{aws_source_uuid}}
FROM hive.{{schema | sqlsafe}}.{{trino_table | sqlsafe}}
WHERE {{column_name | sqlsafe}} = {{aws_source_uuid}}
AND ocp_source = {{ocp_source_uuid}}
AND year = {{year}}
AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpaws_cost_summary_by_regio
sum(markup_cost_amortized),
max(currency_code),
cast({{aws_source_uuid}} as uuid) as source_uuid
FROM hive.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary
WHERE aws_source = {{aws_source_uuid}}
FROM hive.{{schema | sqlsafe}}.{{trino_table | sqlsafe}}
WHERE {{column_name | sqlsafe}} = {{aws_source_uuid}}
AND ocp_source = {{ocp_source_uuid}}
AND year = {{year}}
AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpaws_cost_summary_by_servi
sum(markup_cost_amortized),
max(currency_code),
cast({{aws_source_uuid}} as uuid) as source_uuid
FROM hive.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary
WHERE aws_source = {{aws_source_uuid}}
FROM hive.{{schema | sqlsafe}}.{{trino_table | sqlsafe}}
WHERE {{column_name | sqlsafe}} = {{aws_source_uuid}}
AND ocp_source = {{ocp_source_uuid}}
AND year = {{year}}
AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpaws_cost_summary_p (
max(currency_code),
cast({{aws_source_uuid}} as uuid) as source_uuid,
max(cost_category_id)
FROM hive.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary
WHERE aws_source = {{aws_source_uuid}}
FROM hive.{{schema | sqlsafe}}.{{trino_table | sqlsafe}}
WHERE {{column_name | sqlsafe}} = {{aws_source_uuid}}
AND ocp_source = {{ocp_source_uuid}}
AND year = {{year}}
AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpaws_database_summary_p (
sum(markup_cost_amortized),
max(currency_code),
cast({{aws_source_uuid}} as uuid) as source_uuid
FROM hive.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary
WHERE aws_source = {{aws_source_uuid}}
FROM hive.{{schema | sqlsafe}}.{{trino_table | sqlsafe}}
WHERE {{column_name | sqlsafe}} = {{aws_source_uuid}}
AND ocp_source = {{ocp_source_uuid}}
AND year = {{year}}
AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpaws_network_summary_p (
sum(markup_cost_amortized),
max(currency_code),
cast({{aws_source_uuid}} as uuid) as source_uuid
FROM hive.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary
WHERE aws_source = {{aws_source_uuid}}
FROM hive.{{schema | sqlsafe}}.{{trino_table | sqlsafe}}
WHERE {{column_name | sqlsafe}} = {{aws_source_uuid}}
AND ocp_source = {{ocp_source_uuid}}
AND year = {{year}}
AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpaws_storage_summary_p (
sum(markup_cost_amortized),
max(currency_code),
cast({{aws_source_uuid}} as uuid) as source_uuid
FROM hive.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary
WHERE aws_source = {{aws_source_uuid}}
FROM hive.{{schema | sqlsafe}}.{{trino_table | sqlsafe}}
WHERE {{column_name | sqlsafe}} = {{aws_source_uuid}}
AND ocp_source = {{ocp_source_uuid}}
AND year = {{year}}
AND lpad(month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters
Expand Down
Loading

0 comments on commit 43da108

Please sign in to comment.