Merge branch 'branch-3.4' of https://github.com/apache/spark into bra…

…nch-3.4_snappy_1_1_10_3
apache · Jul 27, 2023 · 1958b73 · 1958b73
2 parents 0d5099d + 135bb49
commit 1958b73
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 14 deletions.
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -192,6 +192,7 @@ jobs:
       HIVE_PROFILE: ${{ matrix.hive }}
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
+      SKIP_PACKAGING: true
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v3
@@ -328,6 +329,8 @@ jobs:
         java:
           - ${{ inputs.java }}
         modules:
+          - >-
+            pyspark-errors
           - >-
             pyspark-sql, pyspark-mllib, pyspark-resource
           - >-
@@ -337,7 +340,7 @@ jobs:
           - >-
             pyspark-pandas-slow
           - >-
-            pyspark-connect, pyspark-errors
+            pyspark-connect
     env:
       MODULES_TO_TEST: ${{ matrix.modules }}
       HADOOP_PROFILE: ${{ inputs.hadoop }}
@@ -346,6 +349,7 @@ jobs:
       SPARK_LOCAL_IP: localhost
       SKIP_UNIDOC: true
       SKIP_MIMA: true
+      SKIP_PACKAGING: true
       METASPACE_SIZE: 1g
     steps:
     - name: Checkout Spark repository
@@ -394,14 +398,20 @@ jobs:
         python3.9 -m pip list
         pypy3 -m pip list
     - name: Install Conda for pip packaging test
+      if: ${{ matrix.modules == 'pyspark-errors' }}
       run: |
         curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh
         bash miniconda.sh -b -p $HOME/miniconda
     # Run the tests.
     - name: Run tests
       env: ${{ fromJSON(inputs.envs) }}
+      shell: 'script -q -e -c "bash {0}"'
       run: |
-        export PATH=$PATH:$HOME/miniconda/bin
+        if [[ "$MODULES_TO_TEST" == "pyspark-errors" ]]; then
+          export PATH=$PATH:$HOME/miniconda/bin
+          export SKIP_PACKAGING=false
+          echo "Python Packaging Tests Enabled!"
+        fi
         ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST"
     - name: Upload coverage to Codecov
       if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true'
@@ -437,6 +447,7 @@ jobs:
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
       SKIP_MIMA: true
+      SKIP_PACKAGING: true
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v3
@@ -578,15 +589,6 @@ jobs:
         python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0'
     - name: Python linter
       run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python
-    - name: Install dependencies for Python code generation check
-      run: |
-        # See more in "Installation" https://docs.buf.build/installation#tarball
-        curl -LO https://github.com/bufbuild/buf/releases/download/v1.15.1/buf-Linux-x86_64.tar.gz
-        mkdir -p $HOME/buf
-        tar -xvzf buf-Linux-x86_64.tar.gz -C $HOME/buf --strip-components 1
-        python3.9 -m pip install 'protobuf==3.19.5' 'mypy-protobuf==3.3.0'
-    - name: Python code generation check
-      run: if test -f ./dev/connect-check-protos.py; then PATH=$PATH:$HOME/buf/bin PYTHON_EXECUTABLE=python3.9 ./dev/connect-check-protos.py; fi
     - name: Install JavaScript linter dependencies
       run: |
         apt update
@@ -850,6 +852,7 @@ jobs:
       SPARK_LOCAL_IP: localhost
       ORACLE_DOCKER_IMAGE_NAME: gvenzl/oracle-xe:21.3.0
       SKIP_MIMA: true
+      SKIP_PACKAGING: true
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v3
@@ -948,7 +951,9 @@ jobs:
       - name: start minikube
         run: |
           # See more in "Installation" https://minikube.sigs.k8s.io/docs/start/
-          curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
+          # curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
+          # TODO(SPARK-44495): Resume to use the latest minikube for k8s-integration-tests.
+          curl -LO https://storage.googleapis.com/minikube/releases/v1.30.1/minikube-linux-amd64
           sudo install minikube-linux-amd64 /usr/local/bin/minikube
           # Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic
           minikube start --cpus 2 --memory 6144

diff --git a/dev/run-tests.py b/dev/run-tests.py
@@ -396,7 +396,7 @@ def run_python_tests(test_modules, parallelism, with_coverage=False):
 
 
 def run_python_packaging_tests():
-    if not os.environ.get("SPARK_JENKINS"):
+    if not os.environ.get("SPARK_JENKINS") and os.environ.get("SKIP_PACKAGING", "false") != "true":
         set_title_and_block("Running PySpark packaging tests", "BLOCK_PYSPARK_PIP_TESTS")
         command = [os.path.join(SPARK_HOME, "dev", "run-pip-tests")]
         run_cmd(command)

diff --git a/python/pyspark/sql/connect/types.py b/python/pyspark/sql/connect/types.py
@@ -155,14 +155,16 @@ def pyspark_types_to_proto_types(data_type: DataType) -> pb2.DataType:
         ret.day_time_interval.start_field = data_type.startField
         ret.day_time_interval.end_field = data_type.endField
     elif isinstance(data_type, StructType):
+        struct = pb2.DataType.Struct()
         for field in data_type.fields:
             struct_field = pb2.DataType.StructField()
             struct_field.name = field.name
             struct_field.data_type.CopyFrom(pyspark_types_to_proto_types(field.dataType))
             struct_field.nullable = field.nullable
             if field.metadata is not None and len(field.metadata) > 0:
                 struct_field.metadata = json.dumps(field.metadata)
-            ret.struct.fields.append(struct_field)
+            struct.fields.append(struct_field)
+        ret.struct.CopyFrom(struct)
     elif isinstance(data_type, MapType):
         ret.map.key_type.CopyFrom(pyspark_types_to_proto_types(data_type.keyType))
         ret.map.value_type.CopyFrom(pyspark_types_to_proto_types(data_type.valueType))