From 99565d803d2f36d9bee4b169737a0b0f2279111b Mon Sep 17 00:00:00 2001
From: Harshal Sheth <hsheth2@gmail.com>
Date: Thu, 6 Jun 2024 15:07:36 -0700
Subject: [PATCH] skip snowflake summary plugin in docs

---
 metadata-ingestion/scripts/docgen.py | 89 ++++++++++++++++++++--------
 1 file changed, 65 insertions(+), 24 deletions(-)

diff --git a/metadata-ingestion/scripts/docgen.py b/metadata-ingestion/scripts/docgen.py
index d240f8e16c7700..797a2f698c2f40 100644
--- a/metadata-ingestion/scripts/docgen.py
+++ b/metadata-ingestion/scripts/docgen.py
@@ -583,6 +583,12 @@ def generate(
         if source and source != plugin_name:
             continue
 
+        if plugin_name in {
+            "snowflake-summary",
+        }:
+            logger.info(f"Skipping {plugin_name} as it is on the deny list")
+            continue
+
         metrics["plugins"]["discovered"] = metrics["plugins"]["discovered"] + 1  # type: ignore
         # We want to attempt to load all plugins before printing a summary.
         source_type = None
@@ -885,11 +891,14 @@ def generate(
     os.makedirs(source_dir, exist_ok=True)
     doc_file = f"{source_dir}/lineage-feature-guide.md"
     with open(doc_file, "w+") as f:
-        f.write("import FeatureAvailability from '@site/src/components/FeatureAvailability';\n\n")
+        f.write(
+            "import FeatureAvailability from '@site/src/components/FeatureAvailability';\n\n"
+        )
         f.write(f"# About DataHub Lineage\n\n")
         f.write("<FeatureAvailability/>\n")
 
-        f.write("""
+        f.write(
+            """
 Data lineage is a **map that shows how data flows through your organization.** It details where your data originates, how it travels, and where it ultimately ends up. 
 This can happen within a single system (like data moving between Snowflake tables) or across various platforms.
 
@@ -979,24 +988,27 @@ def generate(
 
 ### Automatic Lineage Extraction Support
 
-This is a summary of automatic lineage extraciton support in our data source. Please refer to the **Important Capabilities** table in the source documentation. Note that even if the source does not support automatic extraction, you can still add lineage manually using our API & SDKs.\n""")
+This is a summary of automatic lineage extraciton support in our data source. Please refer to the **Important Capabilities** table in the source documentation. Note that even if the source does not support automatic extraction, you can still add lineage manually using our API & SDKs.\n"""
+        )
 
-        f.write("\n| Source | Table-Level Lineage | Column-Level Lineage | Related Configs |\n")
+        f.write(
+            "\n| Source | Table-Level Lineage | Column-Level Lineage | Related Configs |\n"
+        )
         f.write("| ---------- | ------ | ----- |----- |\n")
 
         for platform_id, platform_docs in sorted(
-                source_documentation.items(),
-                key=lambda x: (x[1]["name"].casefold(), x[1]["name"])
-                if "name" in x[1]
-                else (x[0].casefold(), x[0]),
+            source_documentation.items(),
+            key=lambda x: (x[1]["name"].casefold(), x[1]["name"])
+            if "name" in x[1]
+            else (x[0].casefold(), x[0]),
         ):
             for plugin, plugin_docs in sorted(
-                    platform_docs["plugins"].items(),
-                    key=lambda x: str(x[1].get("doc_order"))
-                    if x[1].get("doc_order")
-                    else x[0],
+                platform_docs["plugins"].items(),
+                key=lambda x: str(x[1].get("doc_order"))
+                if x[1].get("doc_order")
+                else x[0],
             ):
-                platform_name = platform_docs['name']
+                platform_name = platform_docs["name"]
                 if len(platform_docs["plugins"].keys()) > 1:
                     # We only need to show this if there are multiple modules.
                     platform_name = f"{platform_name} `{plugin}`"
@@ -1004,33 +1016,60 @@ def generate(
                 # Initialize variables
                 table_level_supported = "❌"
                 column_level_supported = "❌"
-                config_names = ''
+                config_names = ""
 
                 if "capabilities" in plugin_docs:
                     plugin_capabilities = plugin_docs["capabilities"]
 
                     for cap_setting in plugin_capabilities:
                         capability_text = get_capability_text(cap_setting.capability)
-                        capability_supported = get_capability_supported_badge(cap_setting.supported)
+                        capability_supported = get_capability_supported_badge(
+                            cap_setting.supported
+                        )
 
-                        if capability_text == "Table-Level Lineage" and capability_supported == "✅":
+                        if (
+                            capability_text == "Table-Level Lineage"
+                            and capability_supported == "✅"
+                        ):
                             table_level_supported = "✅"
 
-                        if capability_text == "Column-level Lineage" and capability_supported == "✅":
+                        if (
+                            capability_text == "Column-level Lineage"
+                            and capability_supported == "✅"
+                        ):
                             column_level_supported = "✅"
 
                 if not (table_level_supported == "❌" and column_level_supported == "❌"):
                     if "config_schema" in plugin_docs:
-                        config_properties = json.loads(plugin_docs['config_schema']).get('properties', {})
-                        config_names = '<br />'.join(
-                            [f'- {property_name}' for property_name in config_properties if 'lineage' in property_name])
-                lineage_not_applicable_sources = ['azure-ad', 'csv', 'demo-data', 'dynamodb', 'iceberg', 'json-schema', 'ldap', 'openapi', 'pulsar', 'sqlalchemy' ]
-                if platform_id not in lineage_not_applicable_sources :
+                        config_properties = json.loads(
+                            plugin_docs["config_schema"]
+                        ).get("properties", {})
+                        config_names = "<br />".join(
+                            [
+                                f"- {property_name}"
+                                for property_name in config_properties
+                                if "lineage" in property_name
+                            ]
+                        )
+                lineage_not_applicable_sources = [
+                    "azure-ad",
+                    "csv",
+                    "demo-data",
+                    "dynamodb",
+                    "iceberg",
+                    "json-schema",
+                    "ldap",
+                    "openapi",
+                    "pulsar",
+                    "sqlalchemy",
+                ]
+                if platform_id not in lineage_not_applicable_sources:
                     f.write(
                         f"| [{platform_name}](../../generated/ingestion/sources/{platform_id}.md) | {table_level_supported} | {column_level_supported} | {config_names}|\n"
                     )
 
-        f.write("""
+        f.write(
+            """
         
 ### SQL Parser Lineage Extraction
 
@@ -1054,10 +1093,12 @@ def generate(
 - [Data in Context: Lineage Explorer in DataHub](https://blog.datahubproject.io/data-in-context-lineage-explorer-in-datahub-a53a9a476dc4)
 - [Harnessing the Power of Data Lineage with DataHub](https://blog.datahubproject.io/harnessing-the-power-of-data-lineage-with-datahub-ad086358dec4)
 - [Data Lineage: What It Is And Why It Matters](https://blog.datahubproject.io/data-lineage-what-it-is-and-why-it-matters-1a8d9846f0bd)
-                        """)
+                        """
+        )
 
     print("Lineage Documentation Generation Complete")
 
+
 if __name__ == "__main__":
     logger.setLevel("INFO")
     generate()