From 0501123dda092e3a48b8c0679932f6a6cbe7e464 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobiasz=20K=C4=99dzierski?= Date: Thu, 30 Jul 2020 19:20:46 +0200 Subject: [PATCH 1/2] [BEAM-10623] Add workflow to run python tests on Linux/Windows/Mac --- .github/PULL_REQUEST_TEMPLATE.md | 1 + .github/workflows/build_wheels.yml | 3 + .github/workflows/python_tests.yml | 218 ++++++++++++++++++ CI.md | 36 ++- README.md | 1 + scripts/ci/ci_check_are_gcp_variables_set.sh | 2 +- .../dataframe/pandas_doctests_test.py | 1 + sdks/python/apache_beam/io/parquetio_test.py | 18 +- .../interactive/interactive_beam_test.py | 2 + .../interactive/interactive_runner_test.py | 2 + .../portability/portable_runner_test.py | 1 + .../runners/worker/log_handler_test.py | 2 +- .../testing/datatype_inference_test.py | 2 + .../typehints/typecheck_test_py3.py | 32 +-- sdks/python/gen_protos.py | 2 + sdks/python/tox.ini | 33 ++- 16 files changed, 321 insertions(+), 35 deletions(-) create mode 100644 .github/workflows/python_tests.yml diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index dae4d0c3641f..248d1c20905a 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -35,5 +35,6 @@ See [.test-infra/jenkins/README](https://github.com/apache/beam/blob/master/.tes GitHub Actions Tests Status (on master branch) ------------------------------------------------------------------------------------------------ ![Build python source distribution and wheels](https://github.com/apache/beam/workflows/Build%20python%20source%20distribution%20and%20wheels/badge.svg) +![Python tests](https://github.com/apache/beam/workflows/Python%20tests/badge.svg) See [CI.md](https://github.com/apache/beam/blob/master/CI.md) for more information about GitHub Actions CI. diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index c5b22842a1cc..f549751e6089 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -50,6 +50,9 @@ jobs: env: GCP_SA_EMAIL: ${{ secrets.GCP_SA_EMAIL }} GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }} + GCP_PROJECT_ID: "not-needed-here" + GCP_REGION: "not-needed-here" + GCP_TESTING_BUCKET: "not-needed-here" build_source: runs-on: ubuntu-latest diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml new file mode 100644 index 000000000000..155df3cedd1d --- /dev/null +++ b/.github/workflows/python_tests.yml @@ -0,0 +1,218 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# To learn more about GitHub Actions in Apache Beam check the CI.md + +name: Python tests + +on: + schedule: + - cron: '10 2 * * *' + push: + branches: ['master', 'release-*'] + tags: 'v*' + pull_request: + branches: ['master', 'release-*'] + tags: 'v*' + paths: ['sdks/python/**', 'model/**'] + workflow_dispatch: + inputs: + runDataflow: + description: 'Type "true" if you want to run Dataflow tests (GCP variables must be configured, check CI.md)' + default: false + + +jobs: + + check_gcp_variables: + timeout-minutes: 5 + name: "Check GCP variables" + runs-on: ubuntu-latest + outputs: + gcp-variables-set: ${{ steps.check_gcp_variables.outputs.gcp-variables-set }} + steps: + - uses: actions/checkout@v2 + - name: "Check are GCP variables set" + run: "./scripts/ci/ci_check_are_gcp_variables_set.sh" + id: check_gcp_variables + env: + GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }} + GCP_REGION: ${{ secrets.GCP_REGION }} + GCP_SA_EMAIL: ${{ secrets.GCP_SA_EMAIL }} + GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }} + GCP_TESTING_BUCKET: ${{ secrets.GCP_TESTING_BUCKET }} + + build_python_sdk_source: + name: 'Build python source distribution' + if: | + needs.check_gcp_variables.outputs.gcp-variables-set == 'true' && ( + (github.event_name == 'push' || github.event_name == 'schedule') || + (github.event_name == 'workflow_dispatch' && github.event.inputs.runDataflow == 'true') + ) + needs: + - check_gcp_variables + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + - name: Install python + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Get build dependencies + working-directory: ./sdks/python + run: pip install pip setuptools --upgrade && pip install -r build-requirements.txt + - name: Build source + working-directory: ./sdks/python + run: python setup.py sdist + - name: Rename source file + working-directory: ./sdks/python/dist + run: mv $(ls | grep "apache-beam.*tar\.gz") apache-beam-source.tar.gz + - name: Upload compressed sources as artifacts + uses: actions/upload-artifact@v2 + with: + name: python_sdk_source + path: sdks/python/dist/apache-beam-source.tar.gz + + python_unit_tests: + name: 'Python Unit Tests' + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + params: [ + {"py_ver": "3.5", "tox_env": "py35"}, + {"py_ver": "3.6", "tox_env": "py36"}, + {"py_ver": "3.7", "tox_env": "py37"}, + {"py_ver": "3.8", "tox_env": "py38"}, + ] + exclude: + # TODO remove exclusion after issue with protobuf is solved + # https://github.com/protocolbuffers/protobuf/issues/7765 + - os: windows-latest + params: {"py_ver": "3.8", "tox_env": "py38"} + steps: + - name: Checkout code + uses: actions/checkout@v2 + - name: Install python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.params.py_ver }} + - name: Get build dependencies + working-directory: ./sdks/python + run: pip install -r build-requirements.txt + - name: Install tox + run: pip install tox + - name: Run tests basic unix + if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'macos') + working-directory: ./sdks/python + run: tox -c tox.ini -e ${{ matrix.params.tox_env }} + - name: Run tests basic windows + if: startsWith(matrix.os, 'windows') + working-directory: ./sdks/python + run: tox -c tox.ini -e ${{ matrix.params.tox_env }}-win + - name: Upload test logs + uses: actions/upload-artifact@v2 + if: always() + with: + name: pytest-${{matrix.os}}-${{matrix.params.py_ver}} + path: sdks/python/pytest**.xml + + python_wordcount_direct_runner: + name: 'Python Wordcount Direct Runner' + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python: [3.5, 3.6, 3.7, 3.8] + exclude: + # TODO remove exclusion after issue with protobuf is solved + # https://github.com/protocolbuffers/protobuf/issues/7765 + - os: windows-latest + python: 3.8 + steps: + - name: Checkout code + uses: actions/checkout@v2 + - name: Install python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Get build dependencies + working-directory: ./sdks/python + run: pip install -r build-requirements.txt + - name: Install requirements + working-directory: ./sdks/python + run: pip install setuptools --upgrade && pip install -e . + - name: Run WordCount + working-directory: ./sdks/python + shell: bash + run: python -m apache_beam.examples.wordcount --input MANIFEST.in --output counts + + python_wordcount_dataflow: + name: 'Python Wordcount Dataflow' + needs: + - build_python_sdk_source + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python: [3.5, 3.6, 3.7, 3.8] + exclude: + # TODO remove exclusion after issue with protobuf is solved + # https://github.com/protocolbuffers/protobuf/issues/7765 + - os: windows-latest + python: 3.8 + steps: + - name: Checkout code + uses: actions/checkout@v2 + - name: Install python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Download source from artifacts + uses: actions/download-artifact@v2 + with: + name: python_sdk_source + path: apache-beam-source + - name: Authenticate on GCP + uses: GoogleCloudPlatform/github-actions/setup-gcloud@master + with: + service_account_email: ${{ secrets.GCP_SA_EMAIL }} + service_account_key: ${{ secrets.GCP_SA_KEY }} + project_id: ${{ secrets.GCP_PROJECT_ID }} + export_default_credentials: true + - name: Get build dependencies + working-directory: ./sdks/python + run: pip install -r build-requirements.txt + - name: Install requirements + working-directory: ./sdks/python + run: pip install setuptools --upgrade && pip install -e ".[gcp]" + - name: Run WordCount + working-directory: ./sdks/python + shell: bash + run: | + python -m apache_beam.examples.wordcount \ + --input gs://dataflow-samples/shakespeare/kinglear.txt \ + --output gs://${{ secrets.GCP_TESTING_BUCKET }}/python_wordcount_dataflow/counts \ + --runner DataflowRunner \ + --project ${{ secrets.GCP_PROJECT_ID }} \ + --region ${{ secrets.GCP_REGION }} \ + --temp_location gs://${{ secrets.GCP_TESTING_BUCKET }}/tmp/python_wordcount_dataflow/ \ + --sdk_location ../../apache-beam-source/apache-beam-source.tar.gz diff --git a/CI.md b/CI.md index e3a77801fd4d..3007449b4263 100644 --- a/CI.md +++ b/CI.md @@ -75,6 +75,25 @@ run categories. Here is a summary of the run categories with regards of the jobs Those jobs often have matrix run strategy which runs several different variations of the jobs (with different platform type / Python version to run for example) +### Google Cloud Platform Credentials + +Some of the jobs require variables stored as [GitHub Secrets](https://docs.github.com/en/actions/configuring-and-managing-workflows/creating-and-storing-encrypted-secrets) +to perform operations on Google Cloud Platform. +These variables are: + * `GCP_PROJECT_ID` - ID of the Google Cloud project. For example: `apache-beam-testing`. + * `GCP_REGION` - Region of the bucket and dataflow jobs. For example: `us-central1`. + * `GCP_TESTING_BUCKET` - Name of the bucket where temporary files for Dataflow tests will be stored. For example: `beam-github-actions-tests`. + * `GCP_SA_EMAIL` - Service account email address. This is usually of the format `@.iam.gserviceaccount.com`. + * `GCP_SA_KEY` - Service account key. This key should be created and encoded as a Base64 string (eg. `cat my-key.json | base64` on macOS). + +Service Account shall have following permissions ([IAM roles](https://cloud.google.com/iam/docs/understanding-roles)): + * Storage Admin (roles/storage.admin) + * Dataflow Admin (roles/dataflow.admin) + +### Workflows + +#### Build python source distribution and wheels - [build_wheels.yml](.github/workflows/build_wheels.yml) + | Job | Description | Pull Request Run | Direct Push/Merge Run | Scheduled Run | Requires GCP Credentials | |-------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------|-----------------------|---------------|--------------------------| | Check GCP variables | Checks that GCP variables are set. Jobs which required them depend on the output of this job. | Yes | Yes | Yes | Yes/No | @@ -86,16 +105,15 @@ Those jobs often have matrix run strategy which runs several different variation | List files on Google Cloud Storage Bucket | Lists files on GCS for verification purpose. | - | Yes | Yes | Yes | | Tag repo nightly | Tag repo with `nightly-master` tag if build python source distribution and python wheels finished successfully. | - | - | Yes | - | -### Google Cloud Platform Credentials - -Some of the jobs require variables stored as [GitHub Secrets](https://docs.github.com/en/actions/configuring-and-managing-workflows/creating-and-storing-encrypted-secrets) -to perform operations on Google Cloud Platform. -These variables are: - * `GCP_SA_EMAIL` - Service account email address. This is usually of the format `@.iam.gserviceaccount.com`. - * `GCP_SA_KEY` - Service account key. This key should be created and encoded as a Base64 string (eg. `cat my-key.json | base64` on macOS). +#### Python tests - [python_tests.yml](.github/workflows/python_tests.yml) -Service Account shall have following permissions ([IAM roles](https://cloud.google.com/iam/docs/understanding-roles)): - * Storage Object Admin (roles/storage.objectAdmin) +| Job | Description | Pull Request Run | Direct Push/Merge Run | Scheduled Run | Requires GCP Credentials | +|----------------------------------|-----------------------------------------------------------------------------------------------------------------------|------------------|-----------------------|---------------|--------------------------| +| Check GCP variables | Checks that GCP variables are set. Jobs which required them depend on the output of this job. | Yes | Yes | Yes | Yes/No | +| Build python source distribution | Builds python source distribution and uploads it to artifacts. Artifacts are used in `Python Wordcount Dataflow` job. | - | Yes | Yes | Yes | +| Python Unit Tests | Runs python unit tests. | Yes | Yes | Yes | - | +| Python Wordcount Direct Runner | Runs python WordCount example with Direct Runner. | Yes | Yes | Yes | - | +| Python Wordcount Dataflow | Runs python WordCount example with DataFlow Runner. | - | Yes | Yes | Yes | ### GitHub Action Tips diff --git a/README.md b/README.md index 14327127c971..a2e037c8f115 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ [![Compat Check PyPI](https://python-compatibility-tools.appspot.com/one_badge_image?package=apache-beam%5Bgcp%5D)](https://python-compatibility-tools.appspot.com/one_badge_target?package=apache-beam%5Bgcp%5D) [![Compat Check at master](https://python-compatibility-tools.appspot.com/one_badge_image?package=git%2Bgit%3A//github.com/apache/beam.git%23subdirectory%3Dsdks/python)](https://python-compatibility-tools.appspot.com/one_badge_target?package=git%2Bgit%3A//github.com/apache/beam.git%23subdirectory%3Dsdks/python) ![Build python source distribution and wheels](https://github.com/apache/beam/workflows/Build%20python%20source%20distribution%20and%20wheels/badge.svg) +![Python tests](https://github.com/apache/beam/workflows/Python%20tests/badge.svg) ### Post-commit tests status (on master branch) diff --git a/scripts/ci/ci_check_are_gcp_variables_set.sh b/scripts/ci/ci_check_are_gcp_variables_set.sh index 15c269fa829c..9cdcec09f71f 100755 --- a/scripts/ci/ci_check_are_gcp_variables_set.sh +++ b/scripts/ci/ci_check_are_gcp_variables_set.sh @@ -34,7 +34,7 @@ function check_vars() { $ret } -if ! check_vars "GCP_SA_EMAIL" "GCP_SA_KEY"; then +if ! check_vars "GCP_PROJECT_ID" "GCP_REGION" "GCP_SA_EMAIL" "GCP_SA_KEY" "GCP_TESTING_BUCKET"; then echo "::set-output name=gcp-variables-set::false" echo >&2 "!!! WARNING !!!" echo >&2 "Not all GCP variables are set. Jobs which require them will be skipped." diff --git a/sdks/python/apache_beam/dataframe/pandas_doctests_test.py b/sdks/python/apache_beam/dataframe/pandas_doctests_test.py index d11a56b606af..e893ba04e5e3 100644 --- a/sdks/python/apache_beam/dataframe/pandas_doctests_test.py +++ b/sdks/python/apache_beam/dataframe/pandas_doctests_test.py @@ -26,6 +26,7 @@ @unittest.skipIf(sys.version_info <= (3, ), 'Requires contextlib.ExitStack.') @unittest.skipIf(sys.version_info < (3, 6), 'Nondeterministic dict ordering.') +@unittest.skipIf(sys.platform == 'win32', '[BEAM-10626]') class DoctestTest(unittest.TestCase): def test_dataframe_tests(self): result = doctests.testmod( diff --git a/sdks/python/apache_beam/io/parquetio_test.py b/sdks/python/apache_beam/io/parquetio_test.py index 364ba7427d3e..49707df3afdf 100644 --- a/sdks/python/apache_beam/io/parquetio_test.py +++ b/sdks/python/apache_beam/io/parquetio_test.py @@ -48,6 +48,8 @@ from apache_beam.testing.util import equal_to from apache_beam.transforms.display import DisplayData from apache_beam.transforms.display_test import DisplayDataItemMatcher +# TODO(BEAM-8371): Use tempfile.TemporaryDirectory. +from apache_beam.utils.subprocess_server_test import TemporaryDirectory try: import pyarrow as pa @@ -296,8 +298,8 @@ def test_sink_transform_int96(self): path, self.SCHEMA96, num_shards=1, shard_name_template='') def test_sink_transform(self): - with tempfile.NamedTemporaryFile() as dst: - path = dst.name + with TemporaryDirectory() as tmp_dirname: + path = os.path.join(tmp_dirname + "tmp_filename") with TestPipeline() as p: _ = p \ | Create(self.RECORDS) \ @@ -312,8 +314,8 @@ def test_sink_transform(self): assert_that(readback, equal_to([json.dumps(r) for r in self.RECORDS])) def test_batched_read(self): - with tempfile.NamedTemporaryFile() as dst: - path = dst.name + with TemporaryDirectory() as tmp_dirname: + path = os.path.join(tmp_dirname + "tmp_filename") with TestPipeline() as p: _ = p \ | Create(self.RECORDS, reshuffle=False) \ @@ -334,8 +336,8 @@ def test_batched_read(self): param(compression_type='zstd') ]) def test_sink_transform_compressed(self, compression_type): - with tempfile.NamedTemporaryFile() as dst: - path = dst.name + with TemporaryDirectory() as tmp_dirname: + path = os.path.join(tmp_dirname + "tmp_filename") with TestPipeline() as p: _ = p \ | Create(self.RECORDS) \ @@ -450,8 +452,8 @@ def test_selective_columns(self): self._run_parquet_test(file_name, ['name'], None, False, expected_result) def test_sink_transform_multiple_row_group(self): - with tempfile.NamedTemporaryFile() as dst: - path = dst.name + with TemporaryDirectory() as tmp_dirname: + path = os.path.join(tmp_dirname + "tmp_filename") with TestPipeline() as p: # writing 623200 bytes of data _ = p \ diff --git a/sdks/python/apache_beam/runners/interactive/interactive_beam_test.py b/sdks/python/apache_beam/runners/interactive/interactive_beam_test.py index ff9f56a71e4a..3fab1e12d652 100644 --- a/sdks/python/apache_beam/runners/interactive/interactive_beam_test.py +++ b/sdks/python/apache_beam/runners/interactive/interactive_beam_test.py @@ -86,6 +86,7 @@ def test_watch_class_instance(self): test_env.watch(self) self.assertEqual(ie.current_env().watching(), test_env.watching()) + @unittest.skipIf(sys.platform == "win32", "[BEAM-10627]") def test_show_always_watch_given_pcolls(self): p = beam.Pipeline(ir.InteractiveRunner()) # pylint: disable=range-builtin-not-iterating @@ -96,6 +97,7 @@ def test_show_always_watch_given_pcolls(self): ib.show(pcoll) self.assertTrue(pcoll in _get_watched_pcollections_with_variable_names()) + @unittest.skipIf(sys.platform == "win32", "[BEAM-10627]") def test_show_mark_pcolls_computed_when_done(self): p = beam.Pipeline(ir.InteractiveRunner()) # pylint: disable=range-builtin-not-iterating diff --git a/sdks/python/apache_beam/runners/interactive/interactive_runner_test.py b/sdks/python/apache_beam/runners/interactive/interactive_runner_test.py index 51cfeb727b17..00d2f7f8e4b4 100644 --- a/sdks/python/apache_beam/runners/interactive/interactive_runner_test.py +++ b/sdks/python/apache_beam/runners/interactive/interactive_runner_test.py @@ -66,6 +66,7 @@ class InteractiveRunnerTest(unittest.TestCase): def setUp(self): ie.new_env() + @unittest.skipIf(sys.platform == "win32", "[BEAM-10627]") def test_basic(self): p = beam.Pipeline( runner=interactive_runner.InteractiveRunner( @@ -83,6 +84,7 @@ def test_basic(self): _ = pc0 | 'Print3' >> beam.Map(print_with_message('Run3')) p.run().wait_until_finish() + @unittest.skipIf(sys.platform == "win32", "[BEAM-10627]") def test_wordcount(self): class WordExtractingDoFn(beam.DoFn): def process(self, element): diff --git a/sdks/python/apache_beam/runners/portability/portable_runner_test.py b/sdks/python/apache_beam/runners/portability/portable_runner_test.py index c9a33c15bb25..77da9c15e44c 100644 --- a/sdks/python/apache_beam/runners/portability/portable_runner_test.py +++ b/sdks/python/apache_beam/runners/portability/portable_runner_test.py @@ -284,6 +284,7 @@ def create_options(self): return options +@pytest.mark.skipif(sys.platform == "win32", reason="[BEAM-10625]") class PortableRunnerTestWithSubprocesses(PortableRunnerTest): _use_subprocesses = True diff --git a/sdks/python/apache_beam/runners/worker/log_handler_test.py b/sdks/python/apache_beam/runners/worker/log_handler_test.py index 80fe543e3335..dcae3f67be62 100644 --- a/sdks/python/apache_beam/runners/worker/log_handler_test.py +++ b/sdks/python/apache_beam/runners/worker/log_handler_test.py @@ -87,7 +87,7 @@ def _verify_fn_log_handler(self, num_log_entries): self.assertEqual( '%s: %s' % (msg, num_received_log_entries), log_entry.message) self.assertTrue( - re.match(r'.*/log_handler_test.py:\d+', log_entry.log_location), + re.match(r'.*log_handler_test.py:\d+', log_entry.log_location), log_entry.log_location) self.assertGreater(log_entry.timestamp.seconds, 0) self.assertGreaterEqual(log_entry.timestamp.nanos, 0) diff --git a/sdks/python/apache_beam/testing/datatype_inference_test.py b/sdks/python/apache_beam/testing/datatype_inference_test.py index 2c2bf35f391b..8578bc9f8023 100644 --- a/sdks/python/apache_beam/testing/datatype_inference_test.py +++ b/sdks/python/apache_beam/testing/datatype_inference_test.py @@ -19,6 +19,7 @@ from __future__ import absolute_import import logging +import sys import unittest from collections import OrderedDict @@ -174,6 +175,7 @@ def test_infer_typehints_schema(self, _, data, schema): @parameterized.expand([(d["name"], d["data"], d["pyarrow_schema"]) for d in TEST_DATA]) @unittest.skipIf(pa is None, "PyArrow is not installed") + @unittest.skipIf(sys.platform == "win32", "[BEAM-10624]") def test_infer_pyarrow_schema(self, _, data, schema): pyarrow_schema = datatype_inference.infer_pyarrow_schema(data) self.assertEqual(pyarrow_schema, schema) diff --git a/sdks/python/apache_beam/typehints/typecheck_test_py3.py b/sdks/python/apache_beam/typehints/typecheck_test_py3.py index d6a655fbb2ad..719d53d91cd3 100644 --- a/sdks/python/apache_beam/typehints/typecheck_test_py3.py +++ b/sdks/python/apache_beam/typehints/typecheck_test_py3.py @@ -25,6 +25,7 @@ from __future__ import absolute_import +import os import tempfile import unittest from typing import Iterable @@ -35,6 +36,8 @@ from apache_beam.testing.util import assert_that from apache_beam.testing.util import equal_to from apache_beam.typehints import decorators +# TODO(BEAM-8371): Use tempfile.TemporaryDirectory. +from apache_beam.utils.subprocess_server_test import TemporaryDirectory decorators._enable_from_callable = True @@ -92,23 +95,24 @@ def test_wrapper_pass_through(self): # We use a file to check the result because the MyDoFn instance passed is # not the same one that actually runs in the pipeline (it is serialized # here and deserialized in the worker). - with tempfile.NamedTemporaryFile(mode='w+t') as f: - dofn = MyDoFn(f.name) + with TemporaryDirectory() as tmp_dirname: + path = os.path.join(tmp_dirname + "tmp_filename") + dofn = MyDoFn(path) result = self.p | beam.Create([1, 2, 3]) | beam.ParDo(dofn) assert_that(result, equal_to([1, 2, 3])) self.p.run() - f.seek(0) - lines = [line.strip() for line in f] - self.assertListEqual([ - 'setup', - 'start_bundle', - 'process', - 'process', - 'process', - 'finish_bundle', - 'teardown', - ], - lines) + with open(path, mode="r") as ft: + lines = [line.strip() for line in ft] + self.assertListEqual([ + 'setup', + 'start_bundle', + 'process', + 'process', + 'process', + 'finish_bundle', + 'teardown', + ], + lines) def test_wrapper_pipeline_type_check(self): # Verifies that type hints are not masked by the wrapper. What actually diff --git a/sdks/python/gen_protos.py b/sdks/python/gen_protos.py index ba2b0fa3e8e3..76458b0dd945 100644 --- a/sdks/python/gen_protos.py +++ b/sdks/python/gen_protos.py @@ -220,6 +220,8 @@ def _find_protoc_gen_mypy(): # NOTE: this shouldn't be necessary if the virtualenv's environment # is passed to tasks below it, since protoc will search the PATH itself fname = 'protoc-gen-mypy' + if platform.system() == 'Windows': + fname += ".exe" pathstr = os.environ.get('PATH') search_paths = pathstr.split(os.pathsep) if pathstr else [] diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index ba73ca17ea68..637970cd8cf9 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -33,6 +33,7 @@ extras = test whitelist_externals = false time + bash deps = cython: cython==0.28.1 -r build-requirements.txt @@ -51,9 +52,9 @@ commands_pre = python --version pip --version pip check - {toxinidir}/scripts/run_tox_cleanup.sh + bash {toxinidir}/scripts/run_tox_cleanup.sh commands_post = - {toxinidir}/scripts/run_tox_cleanup.sh + bash {toxinidir}/scripts/run_tox_cleanup.sh commands = false {envname} is misconfigured [testenv:py27] @@ -81,6 +82,34 @@ commands = python apache_beam/examples/complete/autocomplete_test.py {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" +[testenv:py35-win] +commands = + python apache_beam/examples/complete/autocomplete_test.py + bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" +install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages} +list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze + +[testenv:py36-win] +commands = + python apache_beam/examples/complete/autocomplete_test.py + bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" +install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages} +list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze + +[testenv:py37-win] +commands = + python apache_beam/examples/complete/autocomplete_test.py + bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" +install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages} +list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze + +[testenv:py38-win] +commands = + python apache_beam/examples/complete/autocomplete_test.py + bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" +install_command = {envbindir}/python.exe {envbindir}/pip.exe install --retries 10 {opts} {packages} +list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze + [testenv:py27-cython] # cython tests are only expected to work in linux (2.x and 3.x) # If we want to add other platforms in the future, it should be: From 7b19dc5618969195d64d74d426fa904e23530619 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobiasz=20K=C4=99dzierski?= Date: Fri, 7 Aug 2020 21:12:18 +0200 Subject: [PATCH 2/2] [BEAM-10624] dtype explicit for the numpy arrays --- sdks/python/apache_beam/testing/datatype_inference_test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sdks/python/apache_beam/testing/datatype_inference_test.py b/sdks/python/apache_beam/testing/datatype_inference_test.py index 8578bc9f8023..c4a429746ab9 100644 --- a/sdks/python/apache_beam/testing/datatype_inference_test.py +++ b/sdks/python/apache_beam/testing/datatype_inference_test.py @@ -19,7 +19,6 @@ from __future__ import absolute_import import logging -import sys import unittest from collections import OrderedDict @@ -55,7 +54,7 @@ ("a", 1), ("b", 0.12345), ("c", u"Hello World!!"), - ("d", np.array([1, 2, 3])), + ("d", np.array([1, 2, 3], dtype=np.int64)), ("e", b"some bytes"), ]), OrderedDict([ @@ -66,7 +65,7 @@ OrderedDict([ ("a", 100000), ("c", u"XoXoX"), - ("d", np.array([4, 5, 6])), + ("d", np.array([4, 5, 6], dtype=np.int64)), ("e", b""), ]), ], @@ -175,7 +174,6 @@ def test_infer_typehints_schema(self, _, data, schema): @parameterized.expand([(d["name"], d["data"], d["pyarrow_schema"]) for d in TEST_DATA]) @unittest.skipIf(pa is None, "PyArrow is not installed") - @unittest.skipIf(sys.platform == "win32", "[BEAM-10624]") def test_infer_pyarrow_schema(self, _, data, schema): pyarrow_schema = datatype_inference.infer_pyarrow_schema(data) self.assertEqual(pyarrow_schema, schema)