From ba5b3f05bb699c9be59b39661cb8186ce7175587 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Fri, 21 Jun 2024 16:53:38 -0400
Subject: [PATCH 1/5] skip broken tests, link to the issue for resolution
 (#1056)

---
 tests/functional/adapter/test_python_model.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 1195cbd3e..957361cb7 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -15,7 +15,9 @@ class TestPythonModelSpark(BasePythonModelTests):
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPySpark(BasePySparkTests):
-    pass
+    @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054")
+    def test_different_dataframes(self, project):
+        return super().test_different_dataframes(project)
 
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
@@ -69,6 +71,7 @@ class TestChangingSchemaSpark:
     def models(self):
         return {"simple_python_model.py": models__simple_python_model}
 
+    @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054")
     def test_changing_schema_with_log_validation(self, project, logs_dir):
         run_dbt(["run"])
         write_file(

From cd6efba4f006f7bc3de761a02717ff9261b736a1 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Fri, 21 Jun 2024 16:50:48 -0500
Subject: [PATCH 2/5] update user docs-issue workflow (#1051)

* update user docs-issue workflow

* pre-commit fix

* update workflow based onf feedback

* whitespace

* update to match bigquery

* pin numpy to below 2.0 new release

* remove numpy pin for its own pr
---
 .github/workflows/docs-issues.yml | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/docs-issues.yml b/.github/workflows/docs-issues.yml
index 00a098df8..f49cf517c 100644
--- a/.github/workflows/docs-issues.yml
+++ b/.github/workflows/docs-issues.yml
@@ -1,19 +1,18 @@
 # **what?**
-# Open an issue in docs.getdbt.com when a PR is labeled `user docs`
+# Open an issue in docs.getdbt.com when an issue is labeled `user docs` and closed as completed
 
 # **why?**
 # To reduce barriers for keeping docs up to date
 
 # **when?**
-# When a PR is labeled `user docs` and is merged.  Runs on pull_request_target to run off the workflow already merged,
-# not the workflow that existed on the PR branch.  This allows old PRs to get comments.
+# When an issue is labeled `user docs` and is closed as completed.  Can be labeled before or after the issue is closed.
 
 
-name: Open issues in docs.getdbt.com repo when a PR is labeled
-run-name: "Open an issue in docs.getdbt.com for PR #${{ github.event.pull_request.number }}"
+name: Open issues in docs.getdbt.com repo when an issue is labeled
+run-name: "Open an issue in docs.getdbt.com for issue #${{ github.event.issue.number }}"
 
 on:
-  pull_request_target:
+  issues:
     types: [labeled, closed]
 
 defaults:
@@ -21,23 +20,22 @@ defaults:
     shell: bash
 
 permissions:
-    issues: write # opens new issues
-    pull-requests: write # comments on PRs
-
+    issues: write # comments on issues
 
 jobs:
   open_issues:
-    # we only want to run this when the PR has been merged or the label in the labeled event is `user docs`.  Otherwise it runs the
+    # we only want to run this when the issue is closed as completed and the label `user docs` has been assigned.
+    # If this logic does not exist in this workflow, it runs the
     # risk of duplicaton of issues being created due to merge and label both triggering this workflow to run and neither having
     # generating the comment before the other runs.  This lives here instead of the shared workflow because this is where we
     # decide if it should run or not.
     if: |
-      (github.event.pull_request.merged == true) &&
-      ((github.event.action == 'closed' && contains( github.event.pull_request.labels.*.name, 'user docs')) ||
+      (github.event.issue.state == 'closed' && github.event.issue.state_reason == 'completed') && (
+      (github.event.action == 'closed' && contains(github.event.issue.labels.*.name, 'user docs')) ||
       (github.event.action == 'labeled' && github.event.label.name == 'user docs'))
     uses: dbt-labs/actions/.github/workflows/open-issue-in-repo.yml@main
     with:
         issue_repository: "dbt-labs/docs.getdbt.com"
-        issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} PR #${{ github.event.pull_request.number }}"
+        issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}"
         issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated."
     secrets: inherit

From 50634b9c6038016f888ea84be542d9a84e52a141 Mon Sep 17 00:00:00 2001
From: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Mon, 24 Jun 2024 09:09:25 -0700
Subject: [PATCH 3/5] update spark internal-release workflow (#1052)

Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
---
 .github/workflows/release-internal.yml | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml
index eb892415c..d4e7a3c93 100644
--- a/.github/workflows/release-internal.yml
+++ b/.github/workflows/release-internal.yml
@@ -10,15 +10,12 @@
 #
 # Manual trigger.
 
-name: "Release internal patch"
+name: "Release to Cloud"
+run-name: "Release to Cloud off of ${{ inputs.ref }}"
 
 on:
   workflow_dispatch:
     inputs:
-      version_number:
-        description: "The release version number (i.e. 1.0.0b1)"
-        type: string
-        required: true
       ref:
         description: "The ref (sha or branch name) to use"
         type: string
@@ -29,6 +26,11 @@ on:
         type: string
         default: "python -c \"import dbt.adapters.spark\""
         required: true
+      skip_tests:
+        description: "Should the tests be skipped? (default to false)"
+        type: boolean
+        required: true
+        default: false
 
 defaults:
   run:
@@ -129,15 +131,14 @@ jobs:
         run: python dagger/run_dbt_spark_tests.py --profile ${{ matrix.test }}
 
   invoke-reusable-workflow:
-    name: "Build and Release Internally"
+    name: "Create cloud release"
     needs: [run-integration-tests]
-
     uses: "dbt-labs/dbt-release/.github/workflows/internal-archive-release.yml@main"
 
     with:
-      version_number: "${{ inputs.version_number }}"
       package_test_command: "${{ inputs.package_test_command }}"
       dbms_name: "spark"
       ref: "${{ inputs.ref }}"
+      skip_tests: "${{ inputs.skip_tests }}"
 
     secrets: "inherit"

From 824ca0f2249d145234f21d7e4066e033a273e2e2 Mon Sep 17 00:00:00 2001
From: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
Date: Wed, 26 Jun 2024 12:06:32 -0400
Subject: [PATCH 4/5] Update the spark version to the current version (#1055)

* update the spark version to the current version
* update pin for pydantic to resolve https://github.com/explosion/spaCy/issues/12659
* exclude koalas dataframes from test
---
 tests/functional/adapter/test_python_model.py | 35 ++++++++++++++++---
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 957361cb7..cd798d1da 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -15,9 +15,22 @@ class TestPythonModelSpark(BasePythonModelTests):
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPySpark(BasePySparkTests):
-    @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054")
     def test_different_dataframes(self, project):
-        return super().test_different_dataframes(project)
+        """
+        Test that python models are supported using dataframes from:
+        - pandas
+        - pyspark
+        - pyspark.pandas (formerly dataspark.koalas)
+
+        Note:
+            The CI environment is on Apache Spark >3.1, which includes koalas as pyspark.pandas.
+            The only Databricks runtime that supports Apache Spark <=3.1 is 9.1 LTS, which is EOL 2024-09-23.
+            For more information, see:
+            - https://github.com/databricks/koalas
+            - https://docs.databricks.com/en/release-notes/runtime/index.html
+        """
+        results = run_dbt(["run", "--exclude", "koalas_df"])
+        assert len(results) == 3
 
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
@@ -37,7 +50,7 @@ def model(dbt, spark):
         materialized='table',
         submission_method='job_cluster',
         job_cluster_config={
-            "spark_version": "7.3.x-scala2.12",
+            "spark_version": "12.2.x-scala2.12",
             "node_type_id": "i3.xlarge",
             "num_workers": 0,
             "spark_conf": {
@@ -48,7 +61,7 @@ def model(dbt, spark):
                 "ResourceClass": "SingleNode"
             }
         },
-        packages=['spacy', 'torch', 'pydantic<1.10.3']
+        packages=['spacy', 'torch', 'pydantic>=1.10.8']
     )
     data = [[1,2]] * 10
     return spark.createDataFrame(data, schema=['test', 'test2'])
@@ -67,11 +80,23 @@ def model(dbt, spark):
 
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestChangingSchemaSpark:
+    """
+    Confirm that we can setup a spot instance and parse required packages into the Databricks job.
+
+    Notes:
+        - This test generates a spot instance on demand using the settings from `job_cluster_config`
+        in `models__simple_python_model` above. It takes several minutes to run due to creating the cluster.
+        The job can be monitored via "Data Engineering > Job Runs" or "Workflows > Job Runs"
+        in the Databricks UI (instead of via the normal cluster).
+        - The `spark_version` argument will need to periodically be updated. It will eventually become
+        unsupported and start experiencing issues.
+        - See https://github.com/explosion/spaCy/issues/12659 for why we're pinning pydantic
+    """
+
     @pytest.fixture(scope="class")
     def models(self):
         return {"simple_python_model.py": models__simple_python_model}
 
-    @pytest.mark.skip("https://github.com/dbt-labs/dbt-spark/issues/1054")
     def test_changing_schema_with_log_validation(self, project, logs_dir):
         run_dbt(["run"])
         write_file(

From 034cb6118e808c1c9ad81d3553a136ac94b77781 Mon Sep 17 00:00:00 2001
From: Mila Page <67295367+VersusFacit@users.noreply.github.com>
Date: Mon, 8 Jul 2024 12:11:09 -0400
Subject: [PATCH 5/5] Base 207/add test (#1057)

* Add test for upstream change.
* Skip session since it's not liking the test.
* Import pytest to fix skip error.
* Dial in tests to reflect error messages from spark.

---------

Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
Co-authored-by: Mike Alfare <mike.alfare@dbtlabs.com>
---
 .../adapter/dbt_show/test_dbt_show.py         | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 tests/functional/adapter/dbt_show/test_dbt_show.py

diff --git a/tests/functional/adapter/dbt_show/test_dbt_show.py b/tests/functional/adapter/dbt_show/test_dbt_show.py
new file mode 100644
index 000000000..bc56fd908
--- /dev/null
+++ b/tests/functional/adapter/dbt_show/test_dbt_show.py
@@ -0,0 +1,22 @@
+import pytest
+
+from dbt.tests.adapter.dbt_show.test_dbt_show import (
+    BaseShowSqlHeader,
+    BaseShowLimit,
+    BaseShowDoesNotHandleDoubleLimit,
+)
+
+
+class TestSparkShowLimit(BaseShowLimit):
+    pass
+
+
+class TestSparkShowSqlHeader(BaseShowSqlHeader):
+    pass
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_http_cluster")
+class TestSparkShowDoesNotHandleDoubleLimit(BaseShowDoesNotHandleDoubleLimit):
+    """The syntax message is quite variable across clusters, but this hits two at once."""
+
+    DATABASE_ERROR_MESSAGE = "limit"