From 3cdf01aa129f6bbfcca70b81a9c4995c834fadcd Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 17 Oct 2024 09:58:25 -0700 Subject: [PATCH] [SPARK-50015][BUILD] Upgrade `grpcio*` to 1.67.0 and `grpc-java` to 1.67.1 --- .github/workflows/build_and_test.yml | 4 ++-- .github/workflows/maven_test.yml | 2 +- dev/create-release/spark-rm/Dockerfile | 4 ++-- dev/infra/Dockerfile | 2 +- dev/requirements.txt | 4 ++-- pom.xml | 2 +- project/SparkBuild.scala | 2 +- .../docs/source/getting_started/install.rst | 4 ++-- python/packaging/classic/setup.py | 2 +- .../sql/connect/proto/base_pb2_grpc.py | 21 +++++++++++++++++++ sql/connect/common/src/main/buf.gen.yaml | 4 ++-- 11 files changed, 36 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 553a961109ab0..8143a9857a670 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -276,7 +276,7 @@ jobs: - name: Install Python packages (Python 3.11) if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect') run: | - python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==5.28.3' + python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.28.3' python3.11 -m pip list # Run the tests. - name: Run tests @@ -725,7 +725,7 @@ jobs: python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ ipython ipython_genutils sphinx_plotly_directive numpy pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \ 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \ - 'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ + 'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' python3.9 -m pip list - name: Python linter diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml index 22153fe2f980c..6965fb4968af3 100644 --- a/.github/workflows/maven_test.yml +++ b/.github/workflows/maven_test.yml @@ -178,7 +178,7 @@ jobs: - name: Install Python packages (Python 3.11) if: (contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect') run: | - python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==5.28.3' + python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.28.3' python3.11 -m pip list # Run the tests. - name: Run tests diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index f70a1dec6e468..fd7c3dbaa61d6 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -102,7 +102,7 @@ RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.2' scipy coverage matp ARG BASIC_PIP_PKGS="numpy pyarrow>=15.0.0 six==1.16.0 pandas==2.2.2 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.62.0 grpcio-status==1.62.0 protobuf==5.28.3 googleapis-common-protos==1.65.0" +ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.28.3 googleapis-common-protos==1.65.0" # Install Python 3.10 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 @@ -131,7 +131,7 @@ RUN python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CON RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \ 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \ -'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ +'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' RUN python3.9 -m pip list diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 70efeecfac581..28f39fd8976f0 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -96,7 +96,7 @@ RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.3' scipy coverage matp ARG BASIC_PIP_PKGS="numpy pyarrow>=15.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.62.0 grpcio-status==1.62.0 protobuf==5.28.3 googleapis-common-protos==1.65.0 graphviz==0.20.3" +ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.28.3 googleapis-common-protos==1.65.0 graphviz==0.20.3" # Install Python 3.10 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 diff --git a/dev/requirements.txt b/dev/requirements.txt index 88456e876d271..9f8d040659000 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -58,8 +58,8 @@ black==23.9.1 py # Spark Connect (required) -grpcio>=1.62.0 -grpcio-status>=1.62.0 +grpcio>=1.67.0 +grpcio-status>=1.67.0 googleapis-common-protos>=1.65.0 # Spark Connect python proto generation plugin (optional) diff --git a/pom.xml b/pom.xml index 086948aac7fa3..bd8881e3ae7d0 100644 --- a/pom.xml +++ b/pom.xml @@ -294,7 +294,7 @@ 33.2.1-jre 1.0.2 - 1.62.2 + 1.67.1 1.1.4 6.0.53 diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index b061ce96bc0fe..cbd0c11958dfc 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -91,7 +91,7 @@ object BuildCommons { // SPARK-41247: needs to be consistent with `protobuf.version` in `pom.xml`. val protoVersion = "4.28.3" // GRPC version used for Spark Connect. - val grpcVersion = "1.62.2" + val grpcVersion = "1.67.1" } object SparkBuild extends PomBuild { diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index 4d777e0840dc7..d0dc285b5257c 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -208,8 +208,8 @@ Package Supported version Note ========================== ================= ========================== `pandas` >=2.0.0 Required for Spark Connect `pyarrow` >=10.0.0 Required for Spark Connect -`grpcio` >=1.62.0 Required for Spark Connect -`grpcio-status` >=1.62.0 Required for Spark Connect +`grpcio` >=1.67.0 Required for Spark Connect +`grpcio-status` >=1.67.0 Required for Spark Connect `googleapis-common-protos` >=1.65.0 Required for Spark Connect `graphviz` >=0.20 Optional for Spark Connect ========================== ================= ========================== diff --git a/python/packaging/classic/setup.py b/python/packaging/classic/setup.py index 60da51caa20ae..d799af1216345 100755 --- a/python/packaging/classic/setup.py +++ b/python/packaging/classic/setup.py @@ -153,7 +153,7 @@ def _supports_symlinks(): _minimum_pandas_version = "2.0.0" _minimum_numpy_version = "1.21" _minimum_pyarrow_version = "10.0.0" -_minimum_grpc_version = "1.62.0" +_minimum_grpc_version = "1.67.0" _minimum_googleapis_common_protos_version = "1.65.0" diff --git a/python/pyspark/sql/connect/proto/base_pb2_grpc.py b/python/pyspark/sql/connect/proto/base_pb2_grpc.py index 12675747e0f92..7501aaf0a3a23 100644 --- a/python/pyspark/sql/connect/proto/base_pb2_grpc.py +++ b/python/pyspark/sql/connect/proto/base_pb2_grpc.py @@ -34,51 +34,61 @@ def __init__(self, channel): "/spark.connect.SparkConnectService/ExecutePlan", request_serializer=spark_dot_connect_dot_base__pb2.ExecutePlanRequest.SerializeToString, response_deserializer=spark_dot_connect_dot_base__pb2.ExecutePlanResponse.FromString, + _registered_method=True, ) self.AnalyzePlan = channel.unary_unary( "/spark.connect.SparkConnectService/AnalyzePlan", request_serializer=spark_dot_connect_dot_base__pb2.AnalyzePlanRequest.SerializeToString, response_deserializer=spark_dot_connect_dot_base__pb2.AnalyzePlanResponse.FromString, + _registered_method=True, ) self.Config = channel.unary_unary( "/spark.connect.SparkConnectService/Config", request_serializer=spark_dot_connect_dot_base__pb2.ConfigRequest.SerializeToString, response_deserializer=spark_dot_connect_dot_base__pb2.ConfigResponse.FromString, + _registered_method=True, ) self.AddArtifacts = channel.stream_unary( "/spark.connect.SparkConnectService/AddArtifacts", request_serializer=spark_dot_connect_dot_base__pb2.AddArtifactsRequest.SerializeToString, response_deserializer=spark_dot_connect_dot_base__pb2.AddArtifactsResponse.FromString, + _registered_method=True, ) self.ArtifactStatus = channel.unary_unary( "/spark.connect.SparkConnectService/ArtifactStatus", request_serializer=spark_dot_connect_dot_base__pb2.ArtifactStatusesRequest.SerializeToString, response_deserializer=spark_dot_connect_dot_base__pb2.ArtifactStatusesResponse.FromString, + _registered_method=True, ) self.Interrupt = channel.unary_unary( "/spark.connect.SparkConnectService/Interrupt", request_serializer=spark_dot_connect_dot_base__pb2.InterruptRequest.SerializeToString, response_deserializer=spark_dot_connect_dot_base__pb2.InterruptResponse.FromString, + _registered_method=True, ) self.ReattachExecute = channel.unary_stream( "/spark.connect.SparkConnectService/ReattachExecute", request_serializer=spark_dot_connect_dot_base__pb2.ReattachExecuteRequest.SerializeToString, response_deserializer=spark_dot_connect_dot_base__pb2.ExecutePlanResponse.FromString, + _registered_method=True, ) self.ReleaseExecute = channel.unary_unary( "/spark.connect.SparkConnectService/ReleaseExecute", request_serializer=spark_dot_connect_dot_base__pb2.ReleaseExecuteRequest.SerializeToString, response_deserializer=spark_dot_connect_dot_base__pb2.ReleaseExecuteResponse.FromString, + _registered_method=True, ) self.ReleaseSession = channel.unary_unary( "/spark.connect.SparkConnectService/ReleaseSession", request_serializer=spark_dot_connect_dot_base__pb2.ReleaseSessionRequest.SerializeToString, response_deserializer=spark_dot_connect_dot_base__pb2.ReleaseSessionResponse.FromString, + _registered_method=True, ) self.FetchErrorDetails = channel.unary_unary( "/spark.connect.SparkConnectService/FetchErrorDetails", request_serializer=spark_dot_connect_dot_base__pb2.FetchErrorDetailsRequest.SerializeToString, response_deserializer=spark_dot_connect_dot_base__pb2.FetchErrorDetailsResponse.FromString, + _registered_method=True, ) @@ -220,6 +230,7 @@ def add_SparkConnectServiceServicer_to_server(servicer, server): "spark.connect.SparkConnectService", rpc_method_handlers ) server.add_generic_rpc_handlers((generic_handler,)) + server.add_registered_method_handlers("spark.connect.SparkConnectService", rpc_method_handlers) # This class is part of an EXPERIMENTAL API. @@ -253,6 +264,7 @@ def ExecutePlan( wait_for_ready, timeout, metadata, + _registered_method=True, ) @staticmethod @@ -282,6 +294,7 @@ def AnalyzePlan( wait_for_ready, timeout, metadata, + _registered_method=True, ) @staticmethod @@ -311,6 +324,7 @@ def Config( wait_for_ready, timeout, metadata, + _registered_method=True, ) @staticmethod @@ -340,6 +354,7 @@ def AddArtifacts( wait_for_ready, timeout, metadata, + _registered_method=True, ) @staticmethod @@ -369,6 +384,7 @@ def ArtifactStatus( wait_for_ready, timeout, metadata, + _registered_method=True, ) @staticmethod @@ -398,6 +414,7 @@ def Interrupt( wait_for_ready, timeout, metadata, + _registered_method=True, ) @staticmethod @@ -427,6 +444,7 @@ def ReattachExecute( wait_for_ready, timeout, metadata, + _registered_method=True, ) @staticmethod @@ -456,6 +474,7 @@ def ReleaseExecute( wait_for_ready, timeout, metadata, + _registered_method=True, ) @staticmethod @@ -485,6 +504,7 @@ def ReleaseSession( wait_for_ready, timeout, metadata, + _registered_method=True, ) @staticmethod @@ -514,4 +534,5 @@ def FetchErrorDetails( wait_for_ready, timeout, metadata, + _registered_method=True, ) diff --git a/sql/connect/common/src/main/buf.gen.yaml b/sql/connect/common/src/main/buf.gen.yaml index 9b0b07932eae8..a68bc880b8315 100644 --- a/sql/connect/common/src/main/buf.gen.yaml +++ b/sql/connect/common/src/main/buf.gen.yaml @@ -22,14 +22,14 @@ plugins: out: gen/proto/csharp - plugin: buf.build/protocolbuffers/java:v21.7 out: gen/proto/java - - plugin: buf.build/grpc/ruby:v1.62.0 + - plugin: buf.build/grpc/ruby:v1.67.0 out: gen/proto/ruby - plugin: buf.build/protocolbuffers/ruby:v21.7 out: gen/proto/ruby # Building the Python build and building the mypy interfaces. - plugin: buf.build/protocolbuffers/python:v21.7 out: gen/proto/python - - plugin: buf.build/grpc/python:v1.62.0 + - plugin: buf.build/grpc/python:v1.67.0 out: gen/proto/python - name: mypy out: gen/proto/python