Skip to content

Commit

Permalink
Merge branch 'branch-3.4' of https://github.com/apache/spark into bra…
Browse files Browse the repository at this point in the history
…nch-3.4_snappy_1_1_10_3
  • Loading branch information
panbingkun committed Jul 27, 2023
2 parents 0d5099d + 135bb49 commit 1958b73
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 14 deletions.
29 changes: 17 additions & 12 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ jobs:
HIVE_PROFILE: ${{ matrix.hive }}
GITHUB_PREV_SHA: ${{ github.event.before }}
SPARK_LOCAL_IP: localhost
SKIP_PACKAGING: true
steps:
- name: Checkout Spark repository
uses: actions/checkout@v3
Expand Down Expand Up @@ -328,6 +329,8 @@ jobs:
java:
- ${{ inputs.java }}
modules:
- >-
pyspark-errors
- >-
pyspark-sql, pyspark-mllib, pyspark-resource
- >-
Expand All @@ -337,7 +340,7 @@ jobs:
- >-
pyspark-pandas-slow
- >-
pyspark-connect, pyspark-errors
pyspark-connect
env:
MODULES_TO_TEST: ${{ matrix.modules }}
HADOOP_PROFILE: ${{ inputs.hadoop }}
Expand All @@ -346,6 +349,7 @@ jobs:
SPARK_LOCAL_IP: localhost
SKIP_UNIDOC: true
SKIP_MIMA: true
SKIP_PACKAGING: true
METASPACE_SIZE: 1g
steps:
- name: Checkout Spark repository
Expand Down Expand Up @@ -394,14 +398,20 @@ jobs:
python3.9 -m pip list
pypy3 -m pip list
- name: Install Conda for pip packaging test
if: ${{ matrix.modules == 'pyspark-errors' }}
run: |
curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
# Run the tests.
- name: Run tests
env: ${{ fromJSON(inputs.envs) }}
shell: 'script -q -e -c "bash {0}"'
run: |
export PATH=$PATH:$HOME/miniconda/bin
if [[ "$MODULES_TO_TEST" == "pyspark-errors" ]]; then
export PATH=$PATH:$HOME/miniconda/bin
export SKIP_PACKAGING=false
echo "Python Packaging Tests Enabled!"
fi
./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST"
- name: Upload coverage to Codecov
if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true'
Expand Down Expand Up @@ -437,6 +447,7 @@ jobs:
GITHUB_PREV_SHA: ${{ github.event.before }}
SPARK_LOCAL_IP: localhost
SKIP_MIMA: true
SKIP_PACKAGING: true
steps:
- name: Checkout Spark repository
uses: actions/checkout@v3
Expand Down Expand Up @@ -578,15 +589,6 @@ jobs:
python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0'
- name: Python linter
run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python
- name: Install dependencies for Python code generation check
run: |
# See more in "Installation" https://docs.buf.build/installation#tarball
curl -LO https://github.com/bufbuild/buf/releases/download/v1.15.1/buf-Linux-x86_64.tar.gz
mkdir -p $HOME/buf
tar -xvzf buf-Linux-x86_64.tar.gz -C $HOME/buf --strip-components 1
python3.9 -m pip install 'protobuf==3.19.5' 'mypy-protobuf==3.3.0'
- name: Python code generation check
run: if test -f ./dev/connect-check-protos.py; then PATH=$PATH:$HOME/buf/bin PYTHON_EXECUTABLE=python3.9 ./dev/connect-check-protos.py; fi
- name: Install JavaScript linter dependencies
run: |
apt update
Expand Down Expand Up @@ -850,6 +852,7 @@ jobs:
SPARK_LOCAL_IP: localhost
ORACLE_DOCKER_IMAGE_NAME: gvenzl/oracle-xe:21.3.0
SKIP_MIMA: true
SKIP_PACKAGING: true
steps:
- name: Checkout Spark repository
uses: actions/checkout@v3
Expand Down Expand Up @@ -948,7 +951,9 @@ jobs:
- name: start minikube
run: |
# See more in "Installation" https://minikube.sigs.k8s.io/docs/start/
curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
# curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
# TODO(SPARK-44495): Resume to use the latest minikube for k8s-integration-tests.
curl -LO https://storage.googleapis.com/minikube/releases/v1.30.1/minikube-linux-amd64
sudo install minikube-linux-amd64 /usr/local/bin/minikube
# Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic
minikube start --cpus 2 --memory 6144
Expand Down
2 changes: 1 addition & 1 deletion dev/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def run_python_tests(test_modules, parallelism, with_coverage=False):


def run_python_packaging_tests():
if not os.environ.get("SPARK_JENKINS"):
if not os.environ.get("SPARK_JENKINS") and os.environ.get("SKIP_PACKAGING", "false") != "true":
set_title_and_block("Running PySpark packaging tests", "BLOCK_PYSPARK_PIP_TESTS")
command = [os.path.join(SPARK_HOME, "dev", "run-pip-tests")]
run_cmd(command)
Expand Down
4 changes: 3 additions & 1 deletion python/pyspark/sql/connect/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,16 @@ def pyspark_types_to_proto_types(data_type: DataType) -> pb2.DataType:
ret.day_time_interval.start_field = data_type.startField
ret.day_time_interval.end_field = data_type.endField
elif isinstance(data_type, StructType):
struct = pb2.DataType.Struct()
for field in data_type.fields:
struct_field = pb2.DataType.StructField()
struct_field.name = field.name
struct_field.data_type.CopyFrom(pyspark_types_to_proto_types(field.dataType))
struct_field.nullable = field.nullable
if field.metadata is not None and len(field.metadata) > 0:
struct_field.metadata = json.dumps(field.metadata)
ret.struct.fields.append(struct_field)
struct.fields.append(struct_field)
ret.struct.CopyFrom(struct)
elif isinstance(data_type, MapType):
ret.map.key_type.CopyFrom(pyspark_types_to_proto_types(data_type.keyType))
ret.map.value_type.CopyFrom(pyspark_types_to_proto_types(data_type.valueType))
Expand Down

0 comments on commit 1958b73

Please sign in to comment.