Modify run_e2etest script to check for job_types and include_dirs (ku…

…beflow#180) * Modified run_e2etest script * Add more logs * Use util to get changed files * Add logs * Change git diff command * Fix broken tests * Fix broken tests * Testing with a fake prow_config * Remove verbose logs * Fix script error * Revert prow_config * Update README.md * Change job_types and include_dirs to arrays * Remove obsolete test * Fix lint error * Fix comments and argument parsing
Linchin · Jul 19, 2018 · a61f638 · a61f638
1 parent bfd4fb6
commit a61f638
Show file tree

Hide file tree

Showing 3 changed files with 61 additions and 85 deletions.
diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ Quick Links
 
 ## Anatomy of our Tests
 
-* Our prow jobs are defined in [config.yaml](https://github.com/kubernetes/test-infra/blob/master/prow/config.yaml)
+* Our prow jobs are defined [here](https://github.com/kubernetes/test-infra/tree/master/config/jobs/kubeflow)
 * Each prow job defines a K8s PodSpec indicating a command to run
 * Our prow jobs use [run_e2e_workflow.py](https://github.com/kubeflow/testing/blob/master/py/kubeflow/testing/run_e2e_workflow.py)
   to trigger an Argo workflow that checks out our code and runs our tests.
@@ -170,6 +170,21 @@ configured for the repository (see these [instructions](#prow-setup) for info on
      1. Use [kubeflow.testing.run_e2e_workflow](https://github.com/kubeflow/testing/tree/master/py/kubeflow/testing/run_e2e_workflow.py)
         to run the Argo workflow.
    * Add a `prow_config.yaml` file that will be passed into run_e2e_workflow to determine which ksonnet app to use for testing. An example can be seen [here](https://github.com/kubeflow/kubeflow/blob/master/prow_config.yaml).
+     * Workflows can optionally be scoped by job type (presubmit/postsubmit) or modified directories. For example:
+
+       ```
+       workflows:
+        - app_dir: kubeflow/testing/workflows
+          component: workflows
+          name: unittests
+          job_types:
+            - presubmit
+          include_dirs:
+            - foo/*
+            - bar/*
+       ```
+       This configures the `unittests` workflow to only run during presubmit jobs, and only if there are changes under directories `foo` or `bar`.
+
 1. Create a prow job for that repository
    * The command for the prow job should be set via the entrypoint baked into the Docker image
    * This way we can change the Prow job just by pushing a docker image and we don't need to update the prow config.

diff --git a/py/kubeflow/testing/run_e2e_workflow.py b/py/kubeflow/testing/run_e2e_workflow.py
@@ -12,6 +12,10 @@
   - name: e2e-test
     app_dir: tensorflow/k8s/test/workflows
     component: workflows
+    job_types:
+      presubmit
+    include_dirs:
+      tensorflow/*
 
   - name: lint
     app_dir: kubeflow/kubeflow/testing/workflows
@@ -23,13 +27,20 @@
 component is the name of the ksonnet component corresponding
 to the workflow to launch.
 
+job_types (optional) is an array of strings representing the job types (presubmit/postsubmit)
+that should run this workflow.
+
+include_dirs (optional) is an array of strings that specify which directories, if modified,
+should run this workflow.
+
 The script expects that the directories
 {repos_dir}/{app_dir} exists. Where repos_dir is provided
 as a command line argument.
 """
 
 import argparse
 import datetime
+import fnmatch
 import logging
 from kubernetes import client as k8s_client
 import os
@@ -50,10 +61,12 @@ def get_namespace(args):
 class WorkflowComponent(object):
   """Datastructure to represent a ksonnet component to submit a workflow."""
 
-  def __init__(self, name, app_dir, component, params):
+  def __init__(self, name, app_dir, component, job_types, include_dirs, params):
     self.name = name
     self.app_dir = app_dir
     self.component = component
+    self.job_types = job_types
+    self.include_dirs = include_dirs
     self.params = params
 
 def _get_src_dir():
@@ -73,7 +86,8 @@ def parse_config_file(config_file, root_dir):
   components = []
   for i in results["workflows"]:
     components.append(WorkflowComponent(
-      i["name"], os.path.join(root_dir, i["app_dir"]), i["component"], i.get("params", {})))
+      i["name"], os.path.join(root_dir, i["app_dir"]), i["component"], i.get("job_types", []),
+      i.get("include_dirs", []), i.get("params", {})))
   return components
 
 def generate_env_from_head(args):
@@ -97,15 +111,17 @@ def generate_env_from_head(args):
 def run(args, file_handler): # pylint: disable=too-many-statements,too-many-branches
   # Print ksonnet version
   util.run(["ks", "version"])
+
+  # Compare with master branch and get changed files.
+  changed_files = util.run(["git", "diff", "--name-only", "master"],
+    cwd=os.path.join(args.repos_dir, os.getenv("REPO_OWNER"), os.getenv("REPO_NAME"))).splitlines()
+
   if args.release:
     generate_env_from_head(args)
   workflows = []
   if args.config_file:
     workflows.extend(parse_config_file(args.config_file, args.repos_dir))
 
-  if args.app_dir and args.component:
-    # TODO(jlewi): We can get rid of this branch once all repos are using a prow_config.xml file.
-    workflows.append(WorkflowComponent("legacy", args.app_dir, args.component, {}))
   create_started_file(args.bucket)
 
   util.maybe_activate_service_account()
@@ -124,6 +140,28 @@ def run(args, file_handler): # pylint: disable=too-many-statements,too-many-bran
     # as a label on the pods.
     workflow_name = os.getenv("JOB_NAME") + "-" + w.name
     job_type = os.getenv("JOB_TYPE")
+
+    # Skip this workflow if it is scoped to a different job type.
+    if w.job_types and not job_type in w.job_types:
+      logging.info("Skipping workflow %s.", w.name)
+      continue
+
+    # If we are scoping this workflow to specific directories, check if any files
+    # modified match the specified regex patterns.
+    dir_modified = False
+    if w.include_dirs:
+      for f in changed_files:
+        for d in w.include_dirs:
+          if fnmatch.fnmatch(f, d):
+            dir_modified = True
+            break
+        if dir_modified:
+          break
+
+    if w.include_dirs and not dir_modified:
+      logging.info("Skipping workflow %s.", w.name)
+      continue
+
     if job_type == "presubmit":
       workflow_name += "-{0}".format(os.getenv("PULL_NUMBER"))
       workflow_name += "-{0}".format(os.getenv("PULL_PULL_SHA")[0:7])
@@ -264,21 +302,6 @@ def main(unparsed_args=None):  # pylint: disable=too-many-locals
     type=str,
     help="The directory where the different repos are checked out.")
 
-  # TODO(jlewi): app_dir and component predate the use of a config
-  # file we should consider getting rid of them once all repos
-  # have been updated to run multiple workflows.
-  parser.add_argument(
-    "--app_dir",
-    type=str,
-    default="",
-    help="The directory where the ksonnet app is stored.")
-
-  parser.add_argument(
-    "--component",
-    type=str,
-    default="",
-    help="The ksonnet component to use.")
-
   parser.add_argument(
     "--release",
     action='store_true',

diff --git a/py/kubeflow/tests/run_e2e_workflow_test.py b/py/kubeflow/tests/run_e2e_workflow_test.py
@@ -23,69 +23,6 @@ def assertItemsMatchRegex(self, expected, actual):
       pattern = "^" + e + "$"
       six.assertRegex(self, actual[index], pattern)
 
-  @mock.patch("kubeflow.testing.run_e2e_workflow.prow_artifacts"
-              ".finalize_prow_job")
-  @mock.patch("kubeflow.testing.run_e2e_workflow.util"
-              ".maybe_activate_service_account")
-  @mock.patch("kubeflow.testing.run_e2e_workflow.util.upload_file_to_gcs")
-  @mock.patch("kubeflow.testing.run_e2e_workflow.util.upload_to_gcs")
-  @mock.patch("kubeflow.testing.run_e2e_workflow.util.load_kube_config")
-  @mock.patch("kubeflow.testing.run_e2e_workflow.argo_client.wait_for_workflows")
-  @mock.patch("kubeflow.testing.run_e2e_workflow.util.configure_kubectl")
-  @mock.patch("kubeflow.testing.run_e2e_workflow.util.run")
-  def testMainPresubmit(self, mock_run, mock_configure, *unused_mocks):  # pylint: disable=no-self-use,unused-argument
-    """Test create started for presubmit job."""
-
-    os.environ = {}
-    os.environ["REPO_OWNER"] = "fake_org"
-    os.environ["REPO_NAME"] = "fake_name"
-    os.environ["PULL_NUMBER"] = "77"
-    os.environ["PULL_PULL_SHA"] = "123abc"
-    os.environ["JOB_NAME"] = "kubeflow-presubmit"
-    os.environ["JOB_TYPE"] = "presubmit"
-    os.environ["BUILD_NUMBER"] = "1234"
-    os.environ["BUILD_ID"] = "11"
-
-    args = ["--project=some-project", "--cluster=some-cluster",
-            "--zone=us-east1-d", "--bucket=some-bucket",
-            "--app_dir=/some/dir",
-            "--component=workflows"]
-    run_e2e_workflow.main(args)
-
-    mock_configure.assert_called_once_with("some-project", "us-east1-d",
-                                           "some-cluster",)
-
-    expected_calls = [
-      ["ks", "version"],
-      ["ks", "env", "add", "kubeflow-presubmit-legacy-77-123abc-1234-.*"],
-      ["ks", "param", "set", "--env=.*", "workflows", "name",
-           "kubeflow-presubmit-legacy-77-123abc-1234-[0-9a-z]{4}"],
-      ["ks", "param", "set",
-           "--env=.*",
-           "workflows", "prow_env",
-           "BUILD_ID=11,BUILD_NUMBER=1234,JOB_NAME=kubeflow-presubmit,"
-           "JOB_TYPE=presubmit,PULL_NUMBER=77,PULL_PULL_SHA=123abc,"
-           "REPO_NAME=fake_name,REPO_OWNER=fake_org"],
-      ["ks", "param", "set",
-           "--env=.*",
-           "workflows", "namespace",
-           "kubeflow-test-infra"],
-      ["ks", "param", "set",
-           "--env=.*",
-           "workflows", "bucket", "some-bucket"],
-      ["ks", "show", "kubeflow-presubmit.*", "-c", "workflows"],
-      ["ks", "apply", "kubeflow-presubmit.*", "-c", "workflows"],
-    ]
-
-    for i, expected in enumerate(expected_calls):
-      self.assertItemsMatchRegex(
-        expected,
-        mock_run.call_args_list[i][0][0])
-      if i > 0:
-        self.assertEqual(
-           "/some/dir",
-           mock_run.call_args_list[i][1]["cwd"])
-
   @mock.patch("kubeflow.testing.run_e2e_workflow.prow_artifacts"
               ".finalize_prow_job")
   @mock.patch("kubeflow.testing.run_e2e_workflow.util"
@@ -134,6 +71,7 @@ def testWithConfig(self, mock_run, mock_configure, *unused_mocks):  # pylint: di
 
     expected_calls = [
       ["ks", "version"],
+      ["git", "diff", "--name-only", "master"],
       ["ks", "env", "add", "kubeflow-presubmit-wf-77-123abc-1234-.*"],
       ["ks", "param", "set", "--env=.*", "workflows", "name",
            "kubeflow-presubmit-wf-77-123abc-1234-[0-9a-z]{4}"],
@@ -164,7 +102,7 @@ def testWithConfig(self, mock_run, mock_configure, *unused_mocks):  # pylint: di
       self.assertItemsMatchRegex(
         expected,
         mock_run.call_args_list[i][0][0])
-      if i > 0:
+      if i > 1:
         self.assertEqual(
            "/src/kubeflow/testing/workflows",
            mock_run.call_args_list[i][1]["cwd"])