Skip to content

Commit

Permalink
Merge pull request #12452: [BEAM-10623] Add workflow to run Beam pyth…
Browse files Browse the repository at this point in the history
…on tests on Linux/Windows/Mac platforms
  • Loading branch information
tvalentyn authored Aug 18, 2020
2 parents 154a383 + 7b19dc5 commit c99390a
Show file tree
Hide file tree
Showing 16 changed files with 321 additions and 37 deletions.
1 change: 1 addition & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,6 @@ See [.test-infra/jenkins/README](https://github.com/apache/beam/blob/master/.tes
GitHub Actions Tests Status (on master branch)
------------------------------------------------------------------------------------------------
![Build python source distribution and wheels](https://github.com/apache/beam/workflows/Build%20python%20source%20distribution%20and%20wheels/badge.svg)
![Python tests](https://github.com/apache/beam/workflows/Python%20tests/badge.svg)

See [CI.md](https://github.com/apache/beam/blob/master/CI.md) for more information about GitHub Actions CI.
3 changes: 3 additions & 0 deletions .github/workflows/build_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ jobs:
env:
GCP_SA_EMAIL: ${{ secrets.GCP_SA_EMAIL }}
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
GCP_PROJECT_ID: "not-needed-here"
GCP_REGION: "not-needed-here"
GCP_TESTING_BUCKET: "not-needed-here"

build_source:
runs-on: ubuntu-latest
Expand Down
218 changes: 218 additions & 0 deletions .github/workflows/python_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# To learn more about GitHub Actions in Apache Beam check the CI.md

name: Python tests

on:
schedule:
- cron: '10 2 * * *'
push:
branches: ['master', 'release-*']
tags: 'v*'
pull_request:
branches: ['master', 'release-*']
tags: 'v*'
paths: ['sdks/python/**', 'model/**']
workflow_dispatch:
inputs:
runDataflow:
description: 'Type "true" if you want to run Dataflow tests (GCP variables must be configured, check CI.md)'
default: false


jobs:

check_gcp_variables:
timeout-minutes: 5
name: "Check GCP variables"
runs-on: ubuntu-latest
outputs:
gcp-variables-set: ${{ steps.check_gcp_variables.outputs.gcp-variables-set }}
steps:
- uses: actions/checkout@v2
- name: "Check are GCP variables set"
run: "./scripts/ci/ci_check_are_gcp_variables_set.sh"
id: check_gcp_variables
env:
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
GCP_REGION: ${{ secrets.GCP_REGION }}
GCP_SA_EMAIL: ${{ secrets.GCP_SA_EMAIL }}
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
GCP_TESTING_BUCKET: ${{ secrets.GCP_TESTING_BUCKET }}

build_python_sdk_source:
name: 'Build python source distribution'
if: |
needs.check_gcp_variables.outputs.gcp-variables-set == 'true' && (
(github.event_name == 'push' || github.event_name == 'schedule') ||
(github.event_name == 'workflow_dispatch' && github.event.inputs.runDataflow == 'true')
)
needs:
- check_gcp_variables
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Install python
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Get build dependencies
working-directory: ./sdks/python
run: pip install pip setuptools --upgrade && pip install -r build-requirements.txt
- name: Build source
working-directory: ./sdks/python
run: python setup.py sdist
- name: Rename source file
working-directory: ./sdks/python/dist
run: mv $(ls | grep "apache-beam.*tar\.gz") apache-beam-source.tar.gz
- name: Upload compressed sources as artifacts
uses: actions/upload-artifact@v2
with:
name: python_sdk_source
path: sdks/python/dist/apache-beam-source.tar.gz

python_unit_tests:
name: 'Python Unit Tests'
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
params: [
{"py_ver": "3.5", "tox_env": "py35"},
{"py_ver": "3.6", "tox_env": "py36"},
{"py_ver": "3.7", "tox_env": "py37"},
{"py_ver": "3.8", "tox_env": "py38"},
]
exclude:
# TODO remove exclusion after issue with protobuf is solved
# https://github.com/protocolbuffers/protobuf/issues/7765
- os: windows-latest
params: {"py_ver": "3.8", "tox_env": "py38"}
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Install python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.params.py_ver }}
- name: Get build dependencies
working-directory: ./sdks/python
run: pip install -r build-requirements.txt
- name: Install tox
run: pip install tox
- name: Run tests basic unix
if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'macos')
working-directory: ./sdks/python
run: tox -c tox.ini -e ${{ matrix.params.tox_env }}
- name: Run tests basic windows
if: startsWith(matrix.os, 'windows')
working-directory: ./sdks/python
run: tox -c tox.ini -e ${{ matrix.params.tox_env }}-win
- name: Upload test logs
uses: actions/upload-artifact@v2
if: always()
with:
name: pytest-${{matrix.os}}-${{matrix.params.py_ver}}
path: sdks/python/pytest**.xml

python_wordcount_direct_runner:
name: 'Python Wordcount Direct Runner'
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python: [3.5, 3.6, 3.7, 3.8]
exclude:
# TODO remove exclusion after issue with protobuf is solved
# https://github.com/protocolbuffers/protobuf/issues/7765
- os: windows-latest
python: 3.8
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Install python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python }}
- name: Get build dependencies
working-directory: ./sdks/python
run: pip install -r build-requirements.txt
- name: Install requirements
working-directory: ./sdks/python
run: pip install setuptools --upgrade && pip install -e .
- name: Run WordCount
working-directory: ./sdks/python
shell: bash
run: python -m apache_beam.examples.wordcount --input MANIFEST.in --output counts

python_wordcount_dataflow:
name: 'Python Wordcount Dataflow'
needs:
- build_python_sdk_source
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python: [3.5, 3.6, 3.7, 3.8]
exclude:
# TODO remove exclusion after issue with protobuf is solved
# https://github.com/protocolbuffers/protobuf/issues/7765
- os: windows-latest
python: 3.8
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Install python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python }}
- name: Download source from artifacts
uses: actions/download-artifact@v2
with:
name: python_sdk_source
path: apache-beam-source
- name: Authenticate on GCP
uses: GoogleCloudPlatform/github-actions/setup-gcloud@master
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
export_default_credentials: true
- name: Get build dependencies
working-directory: ./sdks/python
run: pip install -r build-requirements.txt
- name: Install requirements
working-directory: ./sdks/python
run: pip install setuptools --upgrade && pip install -e ".[gcp]"
- name: Run WordCount
working-directory: ./sdks/python
shell: bash
run: |
python -m apache_beam.examples.wordcount \
--input gs://dataflow-samples/shakespeare/kinglear.txt \
--output gs://${{ secrets.GCP_TESTING_BUCKET }}/python_wordcount_dataflow/counts \
--runner DataflowRunner \
--project ${{ secrets.GCP_PROJECT_ID }} \
--region ${{ secrets.GCP_REGION }} \
--temp_location gs://${{ secrets.GCP_TESTING_BUCKET }}/tmp/python_wordcount_dataflow/ \
--sdk_location ../../apache-beam-source/apache-beam-source.tar.gz
36 changes: 27 additions & 9 deletions CI.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,25 @@ run categories. Here is a summary of the run categories with regards of the jobs
Those jobs often have matrix run strategy which runs several different variations of the jobs
(with different platform type / Python version to run for example)

### Google Cloud Platform Credentials

Some of the jobs require variables stored as [GitHub Secrets](https://docs.github.com/en/actions/configuring-and-managing-workflows/creating-and-storing-encrypted-secrets)
to perform operations on Google Cloud Platform.
These variables are:
* `GCP_PROJECT_ID` - ID of the Google Cloud project. For example: `apache-beam-testing`.
* `GCP_REGION` - Region of the bucket and dataflow jobs. For example: `us-central1`.
* `GCP_TESTING_BUCKET` - Name of the bucket where temporary files for Dataflow tests will be stored. For example: `beam-github-actions-tests`.
* `GCP_SA_EMAIL` - Service account email address. This is usually of the format `<name>@<project-id>.iam.gserviceaccount.com`.
* `GCP_SA_KEY` - Service account key. This key should be created and encoded as a Base64 string (eg. `cat my-key.json | base64` on macOS).

Service Account shall have following permissions ([IAM roles](https://cloud.google.com/iam/docs/understanding-roles)):
* Storage Admin (roles/storage.admin)
* Dataflow Admin (roles/dataflow.admin)

### Workflows

#### Build python source distribution and wheels - [build_wheels.yml](.github/workflows/build_wheels.yml)

| Job | Description | Pull Request Run | Direct Push/Merge Run | Scheduled Run | Requires GCP Credentials |
|-------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------|-----------------------|---------------|--------------------------|
| Check GCP variables | Checks that GCP variables are set. Jobs which required them depend on the output of this job. | Yes | Yes | Yes | Yes/No |
Expand All @@ -86,16 +105,15 @@ Those jobs often have matrix run strategy which runs several different variation
| List files on Google Cloud Storage Bucket | Lists files on GCS for verification purpose. | - | Yes | Yes | Yes |
| Tag repo nightly | Tag repo with `nightly-master` tag if build python source distribution and python wheels finished successfully. | - | - | Yes | - |

### Google Cloud Platform Credentials

Some of the jobs require variables stored as [GitHub Secrets](https://docs.github.com/en/actions/configuring-and-managing-workflows/creating-and-storing-encrypted-secrets)
to perform operations on Google Cloud Platform.
These variables are:
* `GCP_SA_EMAIL` - Service account email address. This is usually of the format `<name>@<project-id>.iam.gserviceaccount.com`.
* `GCP_SA_KEY` - Service account key. This key should be created and encoded as a Base64 string (eg. `cat my-key.json | base64` on macOS).
#### Python tests - [python_tests.yml](.github/workflows/python_tests.yml)

Service Account shall have following permissions ([IAM roles](https://cloud.google.com/iam/docs/understanding-roles)):
* Storage Object Admin (roles/storage.objectAdmin)
| Job | Description | Pull Request Run | Direct Push/Merge Run | Scheduled Run | Requires GCP Credentials |
|----------------------------------|-----------------------------------------------------------------------------------------------------------------------|------------------|-----------------------|---------------|--------------------------|
| Check GCP variables | Checks that GCP variables are set. Jobs which required them depend on the output of this job. | Yes | Yes | Yes | Yes/No |
| Build python source distribution | Builds python source distribution and uploads it to artifacts. Artifacts are used in `Python Wordcount Dataflow` job. | - | Yes | Yes | Yes |
| Python Unit Tests | Runs python unit tests. | Yes | Yes | Yes | - |
| Python Wordcount Direct Runner | Runs python WordCount example with Direct Runner. | Yes | Yes | Yes | - |
| Python Wordcount Dataflow | Runs python WordCount example with DataFlow Runner. | - | Yes | Yes | Yes |

### GitHub Action Tips

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
[![Compat Check PyPI](https://python-compatibility-tools.appspot.com/one_badge_image?package=apache-beam%5Bgcp%5D)](https://python-compatibility-tools.appspot.com/one_badge_target?package=apache-beam%5Bgcp%5D)
[![Compat Check at master](https://python-compatibility-tools.appspot.com/one_badge_image?package=git%2Bgit%3A//github.com/apache/beam.git%23subdirectory%3Dsdks/python)](https://python-compatibility-tools.appspot.com/one_badge_target?package=git%2Bgit%3A//github.com/apache/beam.git%23subdirectory%3Dsdks/python)
![Build python source distribution and wheels](https://github.com/apache/beam/workflows/Build%20python%20source%20distribution%20and%20wheels/badge.svg)
![Python tests](https://github.com/apache/beam/workflows/Python%20tests/badge.svg)

### Post-commit tests status (on master branch)

Expand Down
2 changes: 1 addition & 1 deletion scripts/ci/ci_check_are_gcp_variables_set.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ function check_vars() {
$ret
}

if ! check_vars "GCP_SA_EMAIL" "GCP_SA_KEY"; then
if ! check_vars "GCP_PROJECT_ID" "GCP_REGION" "GCP_SA_EMAIL" "GCP_SA_KEY" "GCP_TESTING_BUCKET"; then
echo "::set-output name=gcp-variables-set::false"
echo >&2 "!!! WARNING !!!"
echo >&2 "Not all GCP variables are set. Jobs which require them will be skipped."
Expand Down
1 change: 1 addition & 0 deletions sdks/python/apache_beam/dataframe/pandas_doctests_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

@unittest.skipIf(sys.version_info <= (3, ), 'Requires contextlib.ExitStack.')
@unittest.skipIf(sys.version_info < (3, 6), 'Nondeterministic dict ordering.')
@unittest.skipIf(sys.platform == 'win32', '[BEAM-10626]')
class DoctestTest(unittest.TestCase):
def test_dataframe_tests(self):
result = doctests.testmod(
Expand Down
18 changes: 10 additions & 8 deletions sdks/python/apache_beam/io/parquetio_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@
from apache_beam.testing.util import equal_to
from apache_beam.transforms.display import DisplayData
from apache_beam.transforms.display_test import DisplayDataItemMatcher
# TODO(BEAM-8371): Use tempfile.TemporaryDirectory.
from apache_beam.utils.subprocess_server_test import TemporaryDirectory

try:
import pyarrow as pa
Expand Down Expand Up @@ -296,8 +298,8 @@ def test_sink_transform_int96(self):
path, self.SCHEMA96, num_shards=1, shard_name_template='')

def test_sink_transform(self):
with tempfile.NamedTemporaryFile() as dst:
path = dst.name
with TemporaryDirectory() as tmp_dirname:
path = os.path.join(tmp_dirname + "tmp_filename")
with TestPipeline() as p:
_ = p \
| Create(self.RECORDS) \
Expand All @@ -312,8 +314,8 @@ def test_sink_transform(self):
assert_that(readback, equal_to([json.dumps(r) for r in self.RECORDS]))

def test_batched_read(self):
with tempfile.NamedTemporaryFile() as dst:
path = dst.name
with TemporaryDirectory() as tmp_dirname:
path = os.path.join(tmp_dirname + "tmp_filename")
with TestPipeline() as p:
_ = p \
| Create(self.RECORDS, reshuffle=False) \
Expand All @@ -334,8 +336,8 @@ def test_batched_read(self):
param(compression_type='zstd')
])
def test_sink_transform_compressed(self, compression_type):
with tempfile.NamedTemporaryFile() as dst:
path = dst.name
with TemporaryDirectory() as tmp_dirname:
path = os.path.join(tmp_dirname + "tmp_filename")
with TestPipeline() as p:
_ = p \
| Create(self.RECORDS) \
Expand Down Expand Up @@ -450,8 +452,8 @@ def test_selective_columns(self):
self._run_parquet_test(file_name, ['name'], None, False, expected_result)

def test_sink_transform_multiple_row_group(self):
with tempfile.NamedTemporaryFile() as dst:
path = dst.name
with TemporaryDirectory() as tmp_dirname:
path = os.path.join(tmp_dirname + "tmp_filename")
with TestPipeline() as p:
# writing 623200 bytes of data
_ = p \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def test_watch_class_instance(self):
test_env.watch(self)
self.assertEqual(ie.current_env().watching(), test_env.watching())

@unittest.skipIf(sys.platform == "win32", "[BEAM-10627]")
def test_show_always_watch_given_pcolls(self):
p = beam.Pipeline(ir.InteractiveRunner())
# pylint: disable=range-builtin-not-iterating
Expand All @@ -96,6 +97,7 @@ def test_show_always_watch_given_pcolls(self):
ib.show(pcoll)
self.assertTrue(pcoll in _get_watched_pcollections_with_variable_names())

@unittest.skipIf(sys.platform == "win32", "[BEAM-10627]")
def test_show_mark_pcolls_computed_when_done(self):
p = beam.Pipeline(ir.InteractiveRunner())
# pylint: disable=range-builtin-not-iterating
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class InteractiveRunnerTest(unittest.TestCase):
def setUp(self):
ie.new_env()

@unittest.skipIf(sys.platform == "win32", "[BEAM-10627]")
def test_basic(self):
p = beam.Pipeline(
runner=interactive_runner.InteractiveRunner(
Expand All @@ -83,6 +84,7 @@ def test_basic(self):
_ = pc0 | 'Print3' >> beam.Map(print_with_message('Run3'))
p.run().wait_until_finish()

@unittest.skipIf(sys.platform == "win32", "[BEAM-10627]")
def test_wordcount(self):
class WordExtractingDoFn(beam.DoFn):
def process(self, element):
Expand Down
Loading

0 comments on commit c99390a

Please sign in to comment.