diff --git a/dev/create-release/generate-contributors.py b/dev/create-release/generate-contributors.py index 57775dde9dd67..4e07bd79f8ac3 100755 --- a/dev/create-release/generate-contributors.py +++ b/dev/create-release/generate-contributors.py @@ -22,7 +22,9 @@ import re import sys -from releaseutils import * +from releaseutils import tag_exists, raw_input, get_commits, yesOrNoPrompt, get_date, \ + is_valid_author, capitalize_author, JIRA, find_components, translate_issue_type, \ + translate_component, CORE_COMPONENT, contributors_file_name, nice_join # You must set the following before use! JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira") diff --git a/dev/create-release/translate-contributors.py b/dev/create-release/translate-contributors.py index 554acc8b8a379..8340266527fc6 100755 --- a/dev/create-release/translate-contributors.py +++ b/dev/create-release/translate-contributors.py @@ -31,7 +31,15 @@ import os import sys -from releaseutils import * +from releaseutils import JIRA, JIRAError, get_jira_name, Github, get_github_name, \ + contributors_file_name, is_valid_author, raw_input, capitalize_author, yesOrNoPrompt + +try: + import unidecode +except ImportError: + print("This tool requires the unidecode library to decode obscure github usernames") + print("Install using 'sudo pip install unidecode'") + sys.exit(-1) # You must set the following before use! JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira") @@ -135,7 +143,7 @@ def generate_candidates(author, issues): # Note that the candidate name may already be in unicode (JIRA returns this) for i, (candidate, source) in enumerate(candidates): try: - candidate = unicode(candidate, "UTF-8") + candidate = unicode(candidate, "UTF-8") # noqa: F821 except TypeError: # already in unicode pass diff --git a/dev/tox.ini b/dev/tox.ini index e8e44803bd28c..c14e6b9446cca 100644 --- a/dev/tox.ini +++ b/dev/tox.ini @@ -19,6 +19,6 @@ max-line-length=100 exclude=python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/* [flake8] -select = E901,E999,F821,F822,F823,F401 +select = E901,E999,F821,F822,F823,F401,F405 exclude = python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/* max-line-length = 100 diff --git a/examples/src/main/python/sql/basic.py b/examples/src/main/python/sql/basic.py index eba8e6ad99d17..2667f9acab521 100644 --- a/examples/src/main/python/sql/basic.py +++ b/examples/src/main/python/sql/basic.py @@ -30,7 +30,7 @@ # $example on:programmatic_schema$ # Import data types -from pyspark.sql.types import * +from pyspark.sql.types import StringType, StructType, StructField # $example off:programmatic_schema$ diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index f84c01e505064..fb05819e74124 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -53,12 +53,12 @@ from pyspark.context import SparkContext from pyspark.rdd import RDD, RDDBarrier from pyspark.files import SparkFiles +from pyspark.status import StatusTracker, SparkJobInfo, SparkStageInfo from pyspark.util import InheritableThread from pyspark.storagelevel import StorageLevel from pyspark.accumulators import Accumulator, AccumulatorParam from pyspark.broadcast import Broadcast from pyspark.serializers import MarshalSerializer, PickleSerializer -from pyspark.status import * from pyspark.taskcontext import TaskContext, BarrierTaskContext, BarrierTaskInfo from pyspark.profiler import Profiler, BasicProfiler from pyspark.version import __version__ # noqa: F401 diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py index a24b1ca5122db..fabfc3253e6f0 100644 --- a/python/pyspark/ml/base.py +++ b/python/pyspark/ml/base.py @@ -21,8 +21,9 @@ import threading from pyspark import since -from pyspark.ml.param.shared import * from pyspark.ml.common import inherit_doc +from pyspark.ml.param.shared import HasInputCol, HasOutputCol, HasLabelCol, HasFeaturesCol, \ + HasPredictionCol, Params from pyspark.sql.functions import udf from pyspark.sql.types import StructField, StructType diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index a223a9be4d8ea..6df425211242f 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -16,19 +16,24 @@ # import operator +import sys +import uuid import warnings from abc import ABCMeta, abstractmethod, abstractproperty from multiprocessing.pool import ThreadPool -from pyspark import keyword_only +from pyspark import keyword_only, since, SparkContext from pyspark.ml import Estimator, Predictor, PredictionModel, Model -from pyspark.ml.param.shared import * +from pyspark.ml.param.shared import HasRawPredictionCol, HasProbabilityCol, HasThresholds, \ + HasRegParam, HasMaxIter, HasFitIntercept, HasTol, HasStandardization, HasWeightCol, \ + HasAggregationDepth, HasThreshold, HasBlockSize, Param, Params, TypeConverters, \ + HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \ _TreeEnsembleModel, _RandomForestParams, _GBTParams, \ _HasVarianceImpurity, _TreeClassifierParams from pyspark.ml.regression import _FactorizationMachinesParams, DecisionTreeRegressionModel -from pyspark.ml.util import * from pyspark.ml.base import _PredictorParams +from pyspark.ml.util import JavaMLWritable, JavaMLReadable, HasTrainingSummary from pyspark.ml.wrapper import JavaParams, \ JavaPredictor, JavaPredictionModel, JavaWrapper from pyspark.ml.common import inherit_doc, _java2py, _py2java diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index e52d2dc73a1c8..6fb14240f95d5 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -19,9 +19,12 @@ import warnings from pyspark import since, keyword_only -from pyspark.ml.util import * +from pyspark.ml.param.shared import HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol, \ + HasAggregationDepth, HasWeightCol, HasTol, HasProbabilityCol, HasBlockSize, \ + HasDistanceMeasure, HasCheckpointInterval, Param, Params, TypeConverters +from pyspark.ml.util import JavaMLWritable, JavaMLReadable, GeneralJavaMLWritable, \ + HasTrainingSummary, SparkContext from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaWrapper -from pyspark.ml.param.shared import * from pyspark.ml.common import inherit_doc, _java2py from pyspark.ml.stat import MultivariateGaussian from pyspark.sql import DataFrame diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 2220293d54ba4..18ce33dee96c7 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -17,7 +17,9 @@ from pyspark import since, keyword_only, SparkContext from pyspark.ml.linalg import _convert_to_vector -from pyspark.ml.param.shared import * +from pyspark.ml.param.shared import HasThreshold, HasThresholds, HasInputCol, HasOutputCol, \ + HasInputCols, HasOutputCols, HasHandleInvalid, HasRelativeError, HasFeaturesCol, HasLabelCol, \ + HasSeed, HasNumFeatures, HasStepSize, HasMaxIter, TypeConverters, Param, Params from pyspark.ml.util import JavaMLReadable, JavaMLWritable from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaTransformer, _jvm from pyspark.ml.common import inherit_doc diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py index 37d3b6eec02d0..7c62ceed5de1e 100644 --- a/python/pyspark/ml/fpm.py +++ b/python/pyspark/ml/fpm.py @@ -15,11 +15,13 @@ # limitations under the License. # -from pyspark import keyword_only +import sys + +from pyspark import keyword_only, since from pyspark.sql import DataFrame -from pyspark.ml.util import * +from pyspark.ml.util import JavaMLWritable, JavaMLReadable from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams -from pyspark.ml.param.shared import * +from pyspark.ml.param.shared import HasPredictionCol, Param, TypeConverters, Params __all__ = ["FPGrowth", "FPGrowthModel", "PrefixSpan"] diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py index 937237fb97d95..287e9e825da05 100644 --- a/python/pyspark/ml/pipeline.py +++ b/python/pyspark/ml/pipeline.py @@ -14,11 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import os -from pyspark import keyword_only +from pyspark import keyword_only, since, SparkContext from pyspark.ml.base import Estimator, Model, Transformer from pyspark.ml.param import Param, Params -from pyspark.ml.util import * +from pyspark.ml.util import MLReadable, MLWritable, JavaMLWriter, JavaMLReader, \ + DefaultParamsReader, DefaultParamsWriter, MLWriter, MLReader, JavaMLWritable from pyspark.ml.wrapper import JavaParams, JavaWrapper from pyspark.ml.common import inherit_doc, _java2py, _py2java diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py index 62b856046234a..e3e440c6497af 100644 --- a/python/pyspark/ml/recommendation.py +++ b/python/pyspark/ml/recommendation.py @@ -18,10 +18,12 @@ import sys from pyspark import since, keyword_only -from pyspark.ml.util import * +from pyspark.ml.param.shared import HasPredictionCol, HasBlockSize, HasMaxIter, HasRegParam, \ + HasCheckpointInterval, HasSeed from pyspark.ml.wrapper import JavaEstimator, JavaModel -from pyspark.ml.param.shared import * from pyspark.ml.common import inherit_doc +from pyspark.ml.param import Params, TypeConverters, Param +from pyspark.ml.util import JavaMLWritable, JavaMLReadable __all__ = ['ALS', 'ALSModel'] diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 6d88b97e8fcfa..6bd32ed1d636d 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -15,15 +15,21 @@ # limitations under the License. # +import sys + from abc import ABCMeta -from pyspark import keyword_only +from pyspark import keyword_only, since from pyspark.ml import Predictor, PredictionModel from pyspark.ml.base import _PredictorParams -from pyspark.ml.param.shared import * +from pyspark.ml.param.shared import HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol, \ + Param, Params, TypeConverters, HasMaxIter, HasTol, HasFitIntercept, HasAggregationDepth, \ + HasBlockSize, HasRegParam, HasSolver, HasStepSize, HasSeed, HasElasticNetParam, \ + HasStandardization, HasLoss, HasVarianceCol from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \ _TreeEnsembleModel, _RandomForestParams, _GBTParams, _TreeRegressorParams -from pyspark.ml.util import * +from pyspark.ml.util import JavaMLWritable, JavaMLReadable, HasTrainingSummary, \ + GeneralJavaMLWritable from pyspark.ml.wrapper import JavaEstimator, JavaModel, \ JavaPredictor, JavaPredictionModel, JavaWrapper from pyspark.ml.common import inherit_doc diff --git a/python/pyspark/ml/tree.py b/python/pyspark/ml/tree.py index 460c76fabc375..dfb24a2295fae 100644 --- a/python/pyspark/ml/tree.py +++ b/python/pyspark/ml/tree.py @@ -15,8 +15,10 @@ # limitations under the License. # -from pyspark.ml.param.shared import * -from pyspark.ml.util import * +from pyspark import since +from pyspark.ml.param import Params +from pyspark.ml.param.shared import HasCheckpointInterval, HasSeed, HasWeightCol, Param, \ + TypeConverters, HasMaxIter, HasStepSize, HasValidationIndicatorCol from pyspark.ml.wrapper import JavaPredictionModel from pyspark.ml.common import inherit_doc diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 6a0c85089e114..4757162a75125 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -14,17 +14,19 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +import sys import itertools from multiprocessing.pool import ThreadPool import numpy as np -from pyspark import keyword_only +from pyspark import keyword_only, since, SparkContext from pyspark.ml import Estimator, Model from pyspark.ml.common import _py2java, _java2py from pyspark.ml.param import Params, Param, TypeConverters from pyspark.ml.param.shared import HasCollectSubModels, HasParallelism, HasSeed -from pyspark.ml.util import * +from pyspark.ml.util import MLReadable, MLWritable, JavaMLWriter, JavaMLReader from pyspark.ml.wrapper import JavaParams from pyspark.sql.functions import col, lit, rand, UserDefinedFunction from pyspark.sql.types import BooleanType diff --git a/python/pyspark/mllib/stat/__init__.py b/python/pyspark/mllib/stat/__init__.py index c8a721d3fe41c..0fb33061838af 100644 --- a/python/pyspark/mllib/stat/__init__.py +++ b/python/pyspark/mllib/stat/__init__.py @@ -19,7 +19,7 @@ Python package for statistical functions in MLlib. """ -from pyspark.mllib.stat._statistics import * +from pyspark.mllib.stat._statistics import Statistics, MultivariateStatisticalSummary from pyspark.mllib.stat.distribution import MultivariateGaussian from pyspark.mllib.stat.test import ChiSqTestResult from pyspark.mllib.stat.KernelDensity import KernelDensity diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index bd4c35576214e..8c08d5cfa692b 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -21,7 +21,7 @@ from pyspark import copy_func, since from pyspark.context import SparkContext -from pyspark.sql.types import * +from pyspark.sql.types import DataType, StructField, StructType, IntegerType, StringType __all__ = ["Column"] diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 1027918adbe15..6ba5e9d72b19c 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -31,7 +31,7 @@ from pyspark.sql.column import Column, _to_seq, _to_list, _to_java_column from pyspark.sql.readwriter import DataFrameWriter, DataFrameWriterV2 from pyspark.sql.streaming import DataStreamWriter -from pyspark.sql.types import * +from pyspark.sql.types import StructType, StructField, StringType, IntegerType from pyspark.sql.pandas.conversion import PandasConversionMixin from pyspark.sql.pandas.map_ops import PandasMapOpsMixin diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py index 83e2baa8f0002..688f8d4992b7d 100644 --- a/python/pyspark/sql/group.py +++ b/python/pyspark/sql/group.py @@ -21,7 +21,7 @@ from pyspark.sql.column import Column, _to_seq from pyspark.sql.dataframe import DataFrame from pyspark.sql.pandas.group_ops import PandasGroupedOpsMixin -from pyspark.sql.types import * +from pyspark.sql.types import StructType, StructField, IntegerType, StringType __all__ = ["GroupedData"] diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py index 3842bc2357c6c..d39a4413a0f2e 100644 --- a/python/pyspark/sql/pandas/conversion.py +++ b/python/pyspark/sql/pandas/conversion.py @@ -22,7 +22,8 @@ from pyspark.rdd import _load_from_socket from pyspark.sql.pandas.serializers import ArrowCollectSerializer from pyspark.sql.types import IntegralType -from pyspark.sql.types import * +from pyspark.sql.types import ByteType, ShortType, IntegerType, LongType, FloatType, \ + DoubleType, BooleanType, TimestampType, StructType, DataType from pyspark.traceback_utils import SCCallSiteSync diff --git a/python/pyspark/sql/pandas/types.py b/python/pyspark/sql/pandas/types.py index 4b70c8a2e95e1..78f9daa130d59 100644 --- a/python/pyspark/sql/pandas/types.py +++ b/python/pyspark/sql/pandas/types.py @@ -20,7 +20,9 @@ pandas instances during the type conversion. """ -from pyspark.sql.types import * +from pyspark.sql.types import ByteType, ShortType, IntegerType, LongType, FloatType, \ + DoubleType, DecimalType, StringType, BinaryType, DateType, TimestampType, ArrayType, \ + StructType, StructField, BooleanType def to_arrow_type(dt): diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index a8a067875d003..982ab38f73654 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -14,12 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import sys from py4j.java_gateway import JavaClass from pyspark import RDD, since from pyspark.sql.column import _to_seq, _to_java_column -from pyspark.sql.types import * +from pyspark.sql.types import StructType from pyspark.sql import utils from pyspark.sql.utils import to_str @@ -1225,7 +1226,6 @@ def overwrite(self, condition): Overwrite rows matching the given filter condition with the contents of the data frame in the output table. """ - condition = _to_java_column(column) self._jwriter.overwrite(condition) @since(3.1) diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index 07413ff48a72b..23e3d74ebd082 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -23,7 +23,7 @@ from pyspark import since, keyword_only from pyspark.sql.column import _to_seq from pyspark.sql.readwriter import OptionUtils, to_str -from pyspark.sql.types import * +from pyspark.sql.types import StructType, StructField, StringType from pyspark.sql.utils import ForeachBatchFunction, StreamingQueryException __all__ = ["StreamingQuery", "StreamingQueryManager", "DataStreamReader", "DataStreamWriter"] @@ -1239,8 +1239,8 @@ def _test(): globs = pyspark.sql.streaming.__dict__.copy() try: spark = SparkSession.builder.getOrCreate() - except py4j.protocol.Py4JError: - spark = SparkSession(sc) + except py4j.protocol.Py4JError: # noqa: F821 + spark = SparkSession(sc) # noqa: F821 globs['tempfile'] = tempfile globs['os'] = os diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py index 6859084237b89..fb4f619c8bf63 100644 --- a/python/pyspark/sql/tests/test_arrow.py +++ b/python/pyspark/sql/tests/test_arrow.py @@ -25,7 +25,9 @@ from pyspark import SparkContext, SparkConf from pyspark.sql import Row, SparkSession from pyspark.sql.functions import udf -from pyspark.sql.types import * +from pyspark.sql.types import StructType, StringType, IntegerType, LongType, \ + FloatType, DoubleType, DecimalType, DateType, TimestampType, BinaryType, StructField, MapType, \ + ArrayType from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \ pandas_requirement_message, pyarrow_requirement_message from pyspark.testing.utils import QuietTest @@ -495,7 +497,7 @@ def conf(cls): if __name__ == "__main__": - from pyspark.sql.tests.test_arrow import * + from pyspark.sql.tests.test_arrow import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py index 99100c7a76f25..5e05a8b63b259 100644 --- a/python/pyspark/sql/tests/test_column.py +++ b/python/pyspark/sql/tests/test_column.py @@ -17,7 +17,7 @@ # from pyspark.sql import Column, Row -from pyspark.sql.types import * +from pyspark.sql.types import StructType, StructField, LongType from pyspark.sql.utils import AnalysisException from pyspark.testing.sqlutils import ReusedSQLTestCase diff --git a/python/pyspark/sql/tests/test_context.py b/python/pyspark/sql/tests/test_context.py index 85920eef9acf4..ce22a52dc119e 100644 --- a/python/pyspark/sql/tests/test_context.py +++ b/python/pyspark/sql/tests/test_context.py @@ -25,7 +25,7 @@ from pyspark import SparkContext, SQLContext from pyspark.sql import Row, SparkSession -from pyspark.sql.types import * +from pyspark.sql.types import StructType, StringType, StructField from pyspark.sql.window import Window from pyspark.testing.utils import ReusedPySparkTestCase diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index 747abdec670ba..d03939821a176 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -23,7 +23,8 @@ import unittest from pyspark.sql import SparkSession, Row -from pyspark.sql.types import * +from pyspark.sql.types import StringType, IntegerType, DoubleType, StructType, StructField, \ + BooleanType, DateType, TimestampType, FloatType from pyspark.sql.utils import AnalysisException, IllegalArgumentException from pyspark.testing.sqlutils import ReusedSQLTestCase, SQLTestUtils, have_pyarrow, have_pandas, \ pandas_requirement_message, pyarrow_requirement_message @@ -903,7 +904,7 @@ def test_query_execution_listener_on_collect_with_arrow(self): if __name__ == "__main__": - from pyspark.sql.tests.test_dataframe import * + from pyspark.sql.tests.test_dataframe import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_datasources.py b/python/pyspark/sql/tests/test_datasources.py index 1b466e294a867..dfef8f5740050 100644 --- a/python/pyspark/sql/tests/test_datasources.py +++ b/python/pyspark/sql/tests/test_datasources.py @@ -19,7 +19,7 @@ import tempfile from pyspark.sql import Row -from pyspark.sql.types import * +from pyspark.sql.types import IntegerType, StructField, StructType, LongType, StringType from pyspark.testing.sqlutils import ReusedSQLTestCase diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py index d1e841f7d6ccc..6eb5355044bb0 100644 --- a/python/pyspark/sql/tests/test_pandas_grouped_map.py +++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py @@ -24,7 +24,9 @@ from pyspark.sql import Row from pyspark.sql.functions import array, explode, col, lit, udf, sum, pandas_udf, PandasUDFType, \ window -from pyspark.sql.types import * +from pyspark.sql.types import IntegerType, DoubleType, ArrayType, BinaryType, ByteType, \ + LongType, DecimalType, ShortType, FloatType, StringType, BooleanType, StructType, \ + StructField, NullType, MapType, TimestampType from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \ pandas_requirement_message, pyarrow_requirement_message from pyspark.testing.utils import QuietTest @@ -606,7 +608,7 @@ def my_pandas_udf(pdf): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_grouped_map import * + from pyspark.sql.tests.test_pandas_grouped_map import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_pandas_udf.py b/python/pyspark/sql/tests/test_pandas_udf.py index 97b4de3aec43c..24b98182b7fcf 100644 --- a/python/pyspark/sql/tests/test_pandas_udf.py +++ b/python/pyspark/sql/tests/test_pandas_udf.py @@ -18,7 +18,7 @@ import unittest from pyspark.sql.functions import udf, pandas_udf, PandasUDFType -from pyspark.sql.types import * +from pyspark.sql.types import DoubleType, StructType, StructField, LongType from pyspark.sql.utils import ParseException, PythonException from pyspark.rdd import PythonEvalType from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \ diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py index 4014a70df9ea0..f63f52239fdf2 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py +++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py @@ -21,12 +21,13 @@ from pyspark.sql import Row from pyspark.sql.functions import array, explode, col, lit, mean, sum, \ udf, pandas_udf, PandasUDFType -from pyspark.sql.types import * +from pyspark.sql.types import ArrayType, TimestampType, DoubleType, MapType from pyspark.sql.utils import AnalysisException from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \ pandas_requirement_message, pyarrow_requirement_message from pyspark.testing.utils import QuietTest + if have_pandas: import pandas as pd from pandas.util.testing import assert_frame_equal diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py index 951cef7f9e927..522807b03af70 100644 --- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py +++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py @@ -28,8 +28,9 @@ from pyspark.sql import Column from pyspark.sql.functions import array, col, expr, lit, sum, struct, udf, pandas_udf, \ PandasUDFType -from pyspark.sql.types import Row -from pyspark.sql.types import * +from pyspark.sql.types import IntegerType, ByteType, StructType, ShortType, BooleanType, \ + LongType, FloatType, DoubleType, DecimalType, StringType, ArrayType, StructField, \ + Row, TimestampType, MapType, DateType, BinaryType from pyspark.sql.utils import AnalysisException from pyspark.testing.sqlutils import ReusedSQLTestCase, test_compiled,\ test_not_compiled_message, have_pandas, have_pyarrow, pandas_requirement_message, \ @@ -1138,7 +1139,7 @@ def test_datasource_with_udf(self): if __name__ == "__main__": - from pyspark.sql.tests.test_pandas_udf_scalar import * + from pyspark.sql.tests.test_pandas_udf_scalar import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py index 44b37ac1ac48c..55ffefc43c105 100644 --- a/python/pyspark/sql/tests/test_readwriter.py +++ b/python/pyspark/sql/tests/test_readwriter.py @@ -21,7 +21,7 @@ from pyspark.sql.functions import col from pyspark.sql.readwriter import DataFrameWriterV2 -from pyspark.sql.types import * +from pyspark.sql.types import StructType, StructField, StringType from pyspark.testing.sqlutils import ReusedSQLTestCase diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py index 6508a0f09f9d8..35c14e430af50 100644 --- a/python/pyspark/sql/tests/test_serde.py +++ b/python/pyspark/sql/tests/test_serde.py @@ -22,7 +22,7 @@ from pyspark.sql import Row from pyspark.sql.functions import lit -from pyspark.sql.types import * +from pyspark.sql.types import StructType, StructField, DecimalType, BinaryType from pyspark.testing.sqlutils import ReusedSQLTestCase, UTCOffsetTimezone diff --git a/python/pyspark/sql/tests/test_streaming.py b/python/pyspark/sql/tests/test_streaming.py index 34ff92b323c73..21ce04618a904 100644 --- a/python/pyspark/sql/tests/test_streaming.py +++ b/python/pyspark/sql/tests/test_streaming.py @@ -21,7 +21,7 @@ import time from pyspark.sql.functions import lit -from pyspark.sql.types import * +from pyspark.sql.types import StructType, StructField, IntegerType, StringType from pyspark.testing.sqlutils import ReusedSQLTestCase diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 68e4de83825cc..7256db055fb9c 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -26,7 +26,9 @@ from pyspark.sql import Row from pyspark.sql.functions import col, UserDefinedFunction -from pyspark.sql.types import * +from pyspark.sql.types import ByteType, ShortType, IntegerType, FloatType, DateType, \ + TimestampType, MapType, StringType, StructType, StructField, ArrayType, DoubleType, LongType, \ + DecimalType, BinaryType, BooleanType, NullType from pyspark.sql.types import _array_signed_int_typecode_ctype_mappings, _array_type_mappings, \ _array_unsigned_int_typecode_ctype_mappings, _infer_type, _make_type_verifier, _merge_type from pyspark.testing.sqlutils import ReusedSQLTestCase, ExamplePointUDT, PythonOnlyUDT, \ diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py index ff92e1e97ee09..ad94bc83cc5be 100644 --- a/python/pyspark/sql/tests/test_udf.py +++ b/python/pyspark/sql/tests/test_udf.py @@ -24,7 +24,8 @@ from pyspark import SparkContext from pyspark.sql import SparkSession, Column, Row from pyspark.sql.functions import UserDefinedFunction, udf -from pyspark.sql.types import * +from pyspark.sql.types import StringType, IntegerType, BooleanType, DoubleType, LongType, \ + ArrayType, StructType, StructField from pyspark.sql.utils import AnalysisException from pyspark.testing.sqlutils import ReusedSQLTestCase, test_compiled, test_not_compiled_message from pyspark.testing.utils import QuietTest @@ -283,7 +284,6 @@ def test_broadcast_in_udf(self): def test_udf_with_filter_function(self): df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"]) from pyspark.sql.functions import col - from pyspark.sql.types import BooleanType my_filter = udf(lambda a: a < 2, BooleanType()) sel = df.select(col("key"), col("value")).filter((my_filter(col("key"))) & (df.value < "2")) @@ -292,7 +292,6 @@ def test_udf_with_filter_function(self): def test_udf_with_aggregate_function(self): df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"]) from pyspark.sql.functions import col, sum - from pyspark.sql.types import BooleanType my_filter = udf(lambda a: a == 1, BooleanType()) sel = df.select(col("key")).distinct().filter(my_filter(col("key"))) @@ -465,7 +464,6 @@ def test_udf_shouldnt_accept_noncallable_object(self): def test_udf_with_decorator(self): from pyspark.sql.functions import lit - from pyspark.sql.types import IntegerType, DoubleType @udf(IntegerType()) def add_one(x): @@ -521,8 +519,6 @@ def as_double(x): ) def test_udf_wrapper(self): - from pyspark.sql.types import IntegerType - def f(x): """Identity""" return x @@ -700,7 +696,7 @@ def test_udf_init_shouldnt_initialize_context(self): if __name__ == "__main__": - from pyspark.sql.tests.test_udf import * + from pyspark.sql.tests.test_udf import * # noqa: F401 try: import xmlrunner diff --git a/python/pyspark/tests/test_serializers.py b/python/pyspark/tests/test_serializers.py index bffd78a5012aa..719d303dda1a6 100644 --- a/python/pyspark/tests/test_serializers.py +++ b/python/pyspark/tests/test_serializers.py @@ -19,10 +19,10 @@ import unittest from pyspark import serializers -from pyspark.serializers import * from pyspark.serializers import CloudPickleSerializer, CompressedSerializer, \ AutoBatchedSerializer, BatchedSerializer, AutoSerializer, NoOpSerializer, PairDeserializer, \ - FlattenedValuesSerializer, CartesianDeserializer + FlattenedValuesSerializer, CartesianDeserializer, PickleSerializer, UTF8Deserializer, \ + MarshalSerializer from pyspark.testing.utils import PySparkTestCase, read_int, write_int, ByteArrayOutput, \ have_numpy, have_scipy @@ -227,7 +227,7 @@ def test_chunked_stream(self): if __name__ == "__main__": - from pyspark.tests.test_serializers import * + from pyspark.tests.test_serializers import * # noqa: F401 try: import xmlrunner