diff --git a/Makefile b/Makefile index 232244a9..95f7bab4 100644 --- a/Makefile +++ b/Makefile @@ -5,24 +5,27 @@ else BUILD_ENV?=$(shell hostname -f) endif +.PHONY: venv venv: tox -e venv +.PHONY: test test: tox +.PHONY: tox_% tox_%: tox -e $* -itest: - tox -e integration - +.PHONY: docs docs: tox -e docs +.PHONY: pypi pypi: tox -e pypi +.PHONY: clean clean: rm -rf docs/build find . -name '*.pyc' -delete diff --git a/README.md b/README.md index 6232a00c..d950bae4 100644 --- a/README.md +++ b/README.md @@ -6,47 +6,9 @@ Interfaces and shared infrastructure for generic task processing (also known as ### Pre-requisites -+ [Docker](https://www.docker.com/get-docker) + [Python 3.8](https://www.python.org/downloads/) + [Virtualenv](https://virtualenv.pypa.io/en/stable/installation/) -### Running examples - -[hello-world.py](/examples/hello-world/py) is a very simple annotated example that launches a task to echo `hello world`. From the root of the repository, run: - - docker-compose -f examples/cluster/docker-compose.yaml \ - run playground examples/hello-world.py - -This will bring up a single master, single agent Mesos cluster using [Docker Compose](https://docs.docker.com/compose/) and launch a single task which will print "hello world" to the sandbox's stdout before terminating. - -Other examples available include: -+ async.py -Example of the [async](#async) task runner. - -+ dynamo_persistence.py -Example that shows how task events may be persisted to [DynamoDB](https://aws.amazon.com/dynamodb) using the `stateful` plugin. - -+ file_persistence.py -Example that shows how task events may be persisted to disk using the `stateful` plugin. - -+ promise.py -Example that shows how the [promise/future](#Promise/Future) task runner (not yet implemented) may be used. - -+ subscription.py -Example of the [subscription](#subscription) task runner. - -+ sync.py -Brief example using the [sync](#sync) task runner. - -+ timeout.py -Example that shows how to timeout a task execution using the `timeout` plugin. - -+ retry.py -Example that shows how to retry a task on failure using the `retry` plugin. - -+ task_logging.py -Example that shows how to fetch task logs from Mesos agents using the `logging` plugin. - ### Running tests From the root of the repository, run: @@ -65,30 +27,10 @@ From the root of the repository, run: ### /plugins -Plugins can be chained to create a task execution pipeline with more than one property. Please refer to persistence/retry/timeout examples. - -#### mesos -Implements all required interfaces to talk to Mesos deployment. This plugin uses [PyMesos](https://github.com/douban/pymesos) to communicate with Mesos. - -#### timeout -Implements an executor to timeout task execution. - -#### retrying -Implements an executor to retry task execution upon failure. - -#### logging -Implements an executor to retrieve task logs from Mesos agents. Note that it has to be the immediate upstream executor of the mesos executor. - -##### Configuration options - -- authentication\_principal Mesos principal -- credential\_secret\_file path to file containing Mesos secret -- mesos\_address host:port to connect to Mesos cluster -- event_translator a fucntion that maps Mesos-specific events to `Event` objects - -#### stateful +Plugins can be chained to create a task execution pipeline with more than one property. -TODO: documentation +#### Kubernetes +Implements all required interfaces to talk to Kubernetes. This plugin uses [kubernetes-client](https://github.com/kubernetes-client/python) to communicate with Kubernetes. ### /runners diff --git a/docs/source/generated/task_processing.interfaces.event.rst b/docs/source/generated/task_processing.interfaces.event.rst index 7ae80859..b8a2cca0 100644 --- a/docs/source/generated/task_processing.interfaces.event.rst +++ b/docs/source/generated/task_processing.interfaces.event.rst @@ -1,7 +1,7 @@ -task\_processing\.interfaces\.event module -========================================== +task\_processing.interfaces.event module +======================================== .. automodule:: task_processing.interfaces.event - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.interfaces.persistence.rst b/docs/source/generated/task_processing.interfaces.persistence.rst new file mode 100644 index 00000000..3c457a0d --- /dev/null +++ b/docs/source/generated/task_processing.interfaces.persistence.rst @@ -0,0 +1,7 @@ +task\_processing.interfaces.persistence module +============================================== + +.. automodule:: task_processing.interfaces.persistence + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.interfaces.rst b/docs/source/generated/task_processing.interfaces.rst index 72c42e29..1a06f845 100644 --- a/docs/source/generated/task_processing.interfaces.rst +++ b/docs/source/generated/task_processing.interfaces.rst @@ -1,12 +1,14 @@ -task\_processing\.interfaces package -==================================== +task\_processing.interfaces package +=================================== Submodules ---------- .. toctree:: + :maxdepth: 4 task_processing.interfaces.event + task_processing.interfaces.persistence task_processing.interfaces.runner task_processing.interfaces.task_executor @@ -14,6 +16,6 @@ Module contents --------------- .. automodule:: task_processing.interfaces - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.interfaces.runner.rst b/docs/source/generated/task_processing.interfaces.runner.rst index 2cf1789b..7efc8ade 100644 --- a/docs/source/generated/task_processing.interfaces.runner.rst +++ b/docs/source/generated/task_processing.interfaces.runner.rst @@ -1,7 +1,7 @@ -task\_processing\.interfaces\.runner module -=========================================== +task\_processing.interfaces.runner module +========================================= .. automodule:: task_processing.interfaces.runner - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.interfaces.task_executor.rst b/docs/source/generated/task_processing.interfaces.task_executor.rst index b2a89cab..7fd3b3d6 100644 --- a/docs/source/generated/task_processing.interfaces.task_executor.rst +++ b/docs/source/generated/task_processing.interfaces.task_executor.rst @@ -1,7 +1,7 @@ -task\_processing\.interfaces\.task\_executor module -=================================================== +task\_processing.interfaces.task\_executor module +================================================= .. automodule:: task_processing.interfaces.task_executor - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.metrics.rst b/docs/source/generated/task_processing.metrics.rst new file mode 100644 index 00000000..002b6d8e --- /dev/null +++ b/docs/source/generated/task_processing.metrics.rst @@ -0,0 +1,7 @@ +task\_processing.metrics module +=============================== + +.. automodule:: task_processing.metrics + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.kubernetes.kube_client.rst b/docs/source/generated/task_processing.plugins.kubernetes.kube_client.rst new file mode 100644 index 00000000..c130a5b1 --- /dev/null +++ b/docs/source/generated/task_processing.plugins.kubernetes.kube_client.rst @@ -0,0 +1,7 @@ +task\_processing.plugins.kubernetes.kube\_client module +======================================================= + +.. automodule:: task_processing.plugins.kubernetes.kube_client + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.kubernetes.kubernetes_pod_executor.rst b/docs/source/generated/task_processing.plugins.kubernetes.kubernetes_pod_executor.rst new file mode 100644 index 00000000..06fafecf --- /dev/null +++ b/docs/source/generated/task_processing.plugins.kubernetes.kubernetes_pod_executor.rst @@ -0,0 +1,7 @@ +task\_processing.plugins.kubernetes.kubernetes\_pod\_executor module +==================================================================== + +.. automodule:: task_processing.plugins.kubernetes.kubernetes_pod_executor + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.kubernetes.rst b/docs/source/generated/task_processing.plugins.kubernetes.rst new file mode 100644 index 00000000..2f0fa689 --- /dev/null +++ b/docs/source/generated/task_processing.plugins.kubernetes.rst @@ -0,0 +1,23 @@ +task\_processing.plugins.kubernetes package +=========================================== + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + task_processing.plugins.kubernetes.kube_client + task_processing.plugins.kubernetes.kubernetes_pod_executor + task_processing.plugins.kubernetes.task_config + task_processing.plugins.kubernetes.task_metadata + task_processing.plugins.kubernetes.types + task_processing.plugins.kubernetes.utils + +Module contents +--------------- + +.. automodule:: task_processing.plugins.kubernetes + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.kubernetes.task_config.rst b/docs/source/generated/task_processing.plugins.kubernetes.task_config.rst new file mode 100644 index 00000000..29546886 --- /dev/null +++ b/docs/source/generated/task_processing.plugins.kubernetes.task_config.rst @@ -0,0 +1,7 @@ +task\_processing.plugins.kubernetes.task\_config module +======================================================= + +.. automodule:: task_processing.plugins.kubernetes.task_config + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.kubernetes.task_metadata.rst b/docs/source/generated/task_processing.plugins.kubernetes.task_metadata.rst new file mode 100644 index 00000000..e8d1845e --- /dev/null +++ b/docs/source/generated/task_processing.plugins.kubernetes.task_metadata.rst @@ -0,0 +1,7 @@ +task\_processing.plugins.kubernetes.task\_metadata module +========================================================= + +.. automodule:: task_processing.plugins.kubernetes.task_metadata + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.kubernetes.types.rst b/docs/source/generated/task_processing.plugins.kubernetes.types.rst new file mode 100644 index 00000000..5c5e0424 --- /dev/null +++ b/docs/source/generated/task_processing.plugins.kubernetes.types.rst @@ -0,0 +1,7 @@ +task\_processing.plugins.kubernetes.types module +================================================ + +.. automodule:: task_processing.plugins.kubernetes.types + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.kubernetes.utils.rst b/docs/source/generated/task_processing.plugins.kubernetes.utils.rst new file mode 100644 index 00000000..5e41a30e --- /dev/null +++ b/docs/source/generated/task_processing.plugins.kubernetes.utils.rst @@ -0,0 +1,7 @@ +task\_processing.plugins.kubernetes.utils module +================================================ + +.. automodule:: task_processing.plugins.kubernetes.utils + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.mesos.execution_framework.rst b/docs/source/generated/task_processing.plugins.mesos.execution_framework.rst deleted file mode 100644 index cea63183..00000000 --- a/docs/source/generated/task_processing.plugins.mesos.execution_framework.rst +++ /dev/null @@ -1,7 +0,0 @@ -task\_processing\.plugins\.mesos\.execution\_framework module -============================================================= - -.. automodule:: task_processing.plugins.mesos.execution_framework - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.mesos.mesos_executor.rst b/docs/source/generated/task_processing.plugins.mesos.mesos_executor.rst deleted file mode 100644 index ba3a3f64..00000000 --- a/docs/source/generated/task_processing.plugins.mesos.mesos_executor.rst +++ /dev/null @@ -1,7 +0,0 @@ -task\_processing\.plugins\.mesos\.mesos\_executor module -======================================================== - -.. automodule:: task_processing.plugins.mesos.mesos_executor - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.mesos.rst b/docs/source/generated/task_processing.plugins.mesos.rst deleted file mode 100644 index 8b8370aa..00000000 --- a/docs/source/generated/task_processing.plugins.mesos.rst +++ /dev/null @@ -1,19 +0,0 @@ -task\_processing\.plugins\.mesos package -======================================== - -Submodules ----------- - -.. toctree:: - - task_processing.plugins.mesos.execution_framework - task_processing.plugins.mesos.mesos_executor - task_processing.plugins.mesos.translator - -Module contents ---------------- - -.. automodule:: task_processing.plugins.mesos - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.mesos.translator.rst b/docs/source/generated/task_processing.plugins.mesos.translator.rst deleted file mode 100644 index 2cb13c96..00000000 --- a/docs/source/generated/task_processing.plugins.mesos.translator.rst +++ /dev/null @@ -1,7 +0,0 @@ -task\_processing\.plugins\.mesos\.translator module -=================================================== - -.. automodule:: task_processing.plugins.mesos.translator - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/generated/task_processing.plugins.rst b/docs/source/generated/task_processing.plugins.rst index 425a912d..06f389b0 100644 --- a/docs/source/generated/task_processing.plugins.rst +++ b/docs/source/generated/task_processing.plugins.rst @@ -1,17 +1,18 @@ -task\_processing\.plugins package -================================= +task\_processing.plugins package +================================ Subpackages ----------- .. toctree:: + :maxdepth: 4 - task_processing.plugins.mesos + task_processing.plugins.kubernetes Module contents --------------- .. automodule:: task_processing.plugins - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.rst b/docs/source/generated/task_processing.rst index a250279d..763333c7 100644 --- a/docs/source/generated/task_processing.rst +++ b/docs/source/generated/task_processing.rst @@ -5,15 +5,26 @@ Subpackages ----------- .. toctree:: + :maxdepth: 4 - task_processing.interfaces - task_processing.plugins - task_processing.runners + task_processing.interfaces + task_processing.plugins + task_processing.runners + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + task_processing.metrics + task_processing.task_processor + task_processing.utils Module contents --------------- .. automodule:: task_processing - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.runners.async.rst b/docs/source/generated/task_processing.runners.async.rst deleted file mode 100644 index 71993f5d..00000000 --- a/docs/source/generated/task_processing.runners.async.rst +++ /dev/null @@ -1,7 +0,0 @@ -task\_processing\.runners\.async module -======================================= - -.. automodule:: task_processing.runners.async - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/generated/task_processing.runners.async_runner.rst b/docs/source/generated/task_processing.runners.async_runner.rst new file mode 100644 index 00000000..480156a3 --- /dev/null +++ b/docs/source/generated/task_processing.runners.async_runner.rst @@ -0,0 +1,7 @@ +task\_processing.runners.async\_runner module +============================================= + +.. automodule:: task_processing.runners.async_runner + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.runners.promise.rst b/docs/source/generated/task_processing.runners.promise.rst index 6f5be7a4..a8f8ff59 100644 --- a/docs/source/generated/task_processing.runners.promise.rst +++ b/docs/source/generated/task_processing.runners.promise.rst @@ -1,7 +1,7 @@ -task\_processing\.runners\.promise module -========================================= +task\_processing.runners.promise module +======================================= .. automodule:: task_processing.runners.promise - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.runners.rst b/docs/source/generated/task_processing.runners.rst index a840d97b..7e18893d 100644 --- a/docs/source/generated/task_processing.runners.rst +++ b/docs/source/generated/task_processing.runners.rst @@ -1,12 +1,13 @@ -task\_processing\.runners package -================================= +task\_processing.runners package +================================ Submodules ---------- .. toctree:: + :maxdepth: 4 - task_processing.runners.async + task_processing.runners.async_runner task_processing.runners.promise task_processing.runners.subscription task_processing.runners.sync @@ -15,6 +16,6 @@ Module contents --------------- .. automodule:: task_processing.runners - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.runners.subscription.rst b/docs/source/generated/task_processing.runners.subscription.rst index 81c088cf..bd0e7f61 100644 --- a/docs/source/generated/task_processing.runners.subscription.rst +++ b/docs/source/generated/task_processing.runners.subscription.rst @@ -1,7 +1,7 @@ -task\_processing\.runners\.subscription module -============================================== +task\_processing.runners.subscription module +============================================ .. automodule:: task_processing.runners.subscription - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.runners.sync.rst b/docs/source/generated/task_processing.runners.sync.rst index 450da322..99116f7c 100644 --- a/docs/source/generated/task_processing.runners.sync.rst +++ b/docs/source/generated/task_processing.runners.sync.rst @@ -1,7 +1,7 @@ -task\_processing\.runners\.sync module -====================================== +task\_processing.runners.sync module +==================================== .. automodule:: task_processing.runners.sync - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.task_processor.rst b/docs/source/generated/task_processing.task_processor.rst new file mode 100644 index 00000000..aabda2f7 --- /dev/null +++ b/docs/source/generated/task_processing.task_processor.rst @@ -0,0 +1,7 @@ +task\_processing.task\_processor module +======================================= + +.. automodule:: task_processing.task_processor + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/generated/task_processing.utils.rst b/docs/source/generated/task_processing.utils.rst new file mode 100644 index 00000000..2253f56b --- /dev/null +++ b/docs/source/generated/task_processing.utils.rst @@ -0,0 +1,7 @@ +task\_processing.utils module +============================= + +.. automodule:: task_processing.utils + :members: + :undoc-members: + :show-inheritance: diff --git a/examples/__init__.py b/examples/__init__.py deleted file mode 100755 index e69de29b..00000000 diff --git a/examples/async.py b/examples/async.py deleted file mode 100755 index 39b21a1e..00000000 --- a/examples/async.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python3 -import logging -import time - -from common import parse_args - -from task_processing.runners.async_runner import Async -from task_processing.runners.async_runner import EventHandler -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -class Counter(object): - def __init__(self): - self.terminated = 0 - - def process_event(self, event): - self.terminated += 1 - - -def main(): - args = parse_args() - - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": args.secret, - "mesos_address": args.master, - "role": args.role, - }, - ) - - counter = Counter() - runner = Async( - executor, - [EventHandler(predicate=lambda x: x.terminal, cb=counter.process_event)], - ) - - TaskConfig = executor.TASK_CONFIG_INTERFACE - tasks_to_launch = 2 - for _ in range(tasks_to_launch): - task_config = TaskConfig(image="busybox", cmd="/bin/true") - runner.run(task_config) - - for _ in range(5): - print("terminated {} tasks".format(counter.terminated)) - if counter.terminated >= tasks_to_launch: - break - time.sleep(2) - - runner.stop() - return 0 if counter.terminated >= tasks_to_launch else 1 - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/cluster/docker-compose.yaml b/examples/cluster/docker-compose.yaml deleted file mode 100644 index dccecfaa..00000000 --- a/examples/cluster/docker-compose.yaml +++ /dev/null @@ -1,51 +0,0 @@ -version: "2" - -services: - zookeeper: - image: zookeeper - environment: - ZK_CONFIG: tickTime=2000,initLimit=10,syncLimit=5,maxClientCnxns=128,forceSync=no,clientPort=2181 - ZK_ID: 1 - mesosmaster: - image: mesosphere/mesos:1.3.0 - ports: - - 5050 - - 5054 - command: 'mesos-master --zk=zk://zookeeper:2181/mesos-testcluster --registry=in_memory --quorum=1 --authenticate --authenticate_agents --work_dir=/tmp/mesos --credentials=/etc/mesos-secrets' - depends_on: - - zookeeper - volumes: - - ./mesos-secrets:/etc/mesos-secrets - mesosagent: - image: mesosphere/mesos:1.3.0 - expose: - - 5051 - volumes: - - /var/run/docker.sock:/var/run/docker.sock - - ./mesos-agent-secret:/etc/mesos-agent-secret - environment: - CLUSTER: testcluster - command: 'mesos-agent --master=zk://zookeeper:2181/mesos-testcluster --resources="cpus:20;mem:2048;disk:2000;ports:[31000-31100];cpus(taskproc):10;mem(taskproc):1024;disk(taskproc):1000;ports(taskproc):[31200-31500]" --credential=/etc/mesos-agent-secret --containerizers=docker --docker=/usr/bin/docker --work_dir=/tmp/mesos --attributes="region:fakeregion;pool:default" --no-docker_kill_orphans --log_dir=/var/log/mesos' - depends_on: - - mesosmaster - - zookeeper - playground: - build: - context: ../.. - dockerfile: ./examples/cluster/playground/Dockerfile - args: - PIP_INDEX_URL: ${PIP_INDEX_URL} - environment: - MESOS: mesosmaster:5050 - DYNAMO: http://dynamodb:8000 - depends_on: - - zookeeper - - mesosmaster - - mesosagent - - dynamodb - volumes: - - /var/run/docker.sock:/var/run/docker.sock - dynamodb: - image: deangiberson/aws-dynamodb-local - ports: - - '8000:8000' diff --git a/examples/cluster/mesos-agent-secret b/examples/cluster/mesos-agent-secret deleted file mode 100644 index c666e111..00000000 --- a/examples/cluster/mesos-agent-secret +++ /dev/null @@ -1,4 +0,0 @@ - { - "principal": "agent", - "secret": "secretagent" - } diff --git a/examples/cluster/mesos-secrets b/examples/cluster/mesos-secrets deleted file mode 100644 index 7d7663a6..00000000 --- a/examples/cluster/mesos-secrets +++ /dev/null @@ -1,12 +0,0 @@ -{ - "credentials": [ - { - "principal": "taskproc", - "secret": "secret" - }, - { - "principal": "agent", - "secret": "secretagent" - } - ] -} diff --git a/examples/cluster/playground/Dockerfile b/examples/cluster/playground/Dockerfile deleted file mode 100644 index d41295a7..00000000 --- a/examples/cluster/playground/Dockerfile +++ /dev/null @@ -1,35 +0,0 @@ -FROM ubuntu:jammy - -RUN apt-get update -yq && \ - apt-get install -yq \ - # needed to add a ppa - software-properties-common && \ - add-apt-repository ppa:deadsnakes/ppa - -RUN apt-get update -q && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - software-properties-common \ - debhelper dpkg-dev gcc gdebi-core git help2man libffi-dev \ - dh-virtualenv \ - libssl-dev libsasl2-modules libyaml-dev pyflakes3 python3.8-dev python3.8-distutils python3-pip python3-pytest python3-http-parser\ - tox python3-yaml wget zip zsh \ - openssh-server docker.io curl vim jq libsvn-dev \ - && apt-get clean - -ARG PIP_INDEX_URL -ENV PIP_INDEX_URL=${PIP_INDEX_URL:-https://pypi.python.org/simple} -RUN pip3 install --index-url ${PIP_INDEX_URL} virtualenv==16.7.5 - -RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd -RUN mkdir /var/run/sshd - -ADD . /src -ENV PYTHONPATH=/src -WORKDIR /src - -# temporarily downpin cryptography until we can make it grab the correct pre-built wheel in itests -RUN pip3 install . -RUN pip3 install -r requirements-dev.txt -RUN pip3 install pymesos - -CMD /bin/bash diff --git a/examples/cluster/secret b/examples/cluster/secret deleted file mode 100644 index d97c5ead..00000000 --- a/examples/cluster/secret +++ /dev/null @@ -1 +0,0 @@ -secret diff --git a/examples/common.py b/examples/common.py deleted file mode 100644 index 2df2849c..00000000 --- a/examples/common.py +++ /dev/null @@ -1,38 +0,0 @@ -import argparse -import os - - -def parse_args(): - parser = argparse.ArgumentParser(description="Runs a task processing task") - - parser.add_argument( - "-m", - "--master", - dest="master", - default=os.environ.get("MESOS", "127.0.0.1:5050"), - help="mesos master address", - ) - - parser.add_argument("-p", "--pool", dest="pool", help="mesos resource pool to use") - - parser.add_argument( - "-r", - "--role", - dest="role", - default="taskproc", - help="mesos reservation role to use", - ) - - with open("./examples/cluster/secret") as f: - default_secret = f.read().strip() - - parser.add_argument( - "-s", - "--secret", - dest="secret", - default=default_secret, - help="mesos secret to use", - ) - - args = parser.parse_args() - return args diff --git a/examples/dynamo_persistence.py b/examples/dynamo_persistence.py deleted file mode 100755 index cb28b045..00000000 --- a/examples/dynamo_persistence.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python3 -import logging -import os - -from boto3 import session -from botocore.errorfactory import ClientError - -from task_processing.plugins.persistence.dynamodb_persistence import DynamoDBPersister -from task_processing.runners.sync import Sync -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - mesos_address = os.getenv("MESOS", "mesosmaster:5050") - with open("./examples/cluster/secret") as f: - secret = f.read().strip() - - processor = TaskProcessor() - for p in ["mesos", "stateful"]: - processor.load_plugin(provider_module="task_processing.plugins." + p) - mesos_executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": secret, - "mesos_address": mesos_address, - "role": "taskproc", - }, - ) - - s = session.Session( - region_name="foo", aws_access_key_id="foo", aws_secret_access_key="bar" - ) - dynamo_address = os.getenv("DYNAMO", "http://dynamodb:5050") - client = s.client( - service_name="dynamodb", - endpoint_url=dynamo_address, - ) - try: - create_table(client) - except ClientError: - pass - - executor = processor.executor_from_config( - provider="stateful", - provider_config={ - "downstream_executor": mesos_executor, - "persister": DynamoDBPersister( - table_name="events", endpoint_url=dynamo_address, session=s - ), - }, - ) - runner = Sync(executor=executor) - tasks = set() - TaskConfig = mesos_executor.TASK_CONFIG_INTERFACE - for _ in range(1, 2): - task_config = TaskConfig(image="ubuntu:14.04", cmd="/bin/sleep 2") - tasks.add(task_config.task_id) - runner.run(task_config) - print(executor.status(task_config.task_id)) - - -def create_table(client): - return client.create_table( - TableName="events", - KeySchema=[ - {"AttributeName": "task_id", "KeyType": "HASH"}, - {"AttributeName": "timestamp", "KeyType": "RANGE"}, - ], - AttributeDefinitions=[ - {"AttributeName": "task_id", "AttributeType": "S"}, - {"AttributeName": "timestamp", "AttributeType": "N"}, - ], - ProvisionedThroughput={"ReadCapacityUnits": 123, "WriteCapacityUnits": 123}, - ) - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/file_persistence.py b/examples/file_persistence.py deleted file mode 100755 index 9606645c..00000000 --- a/examples/file_persistence.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -import logging -import os - -from task_processing.plugins.persistence.file_persistence import FilePersistence -from task_processing.runners.sync import Sync -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - mesos_address = os.getenv("MESOS", "mesosmaster:5050") - with open("./examples/cluster/secret") as f: - secret = f.read().strip() - - processor = TaskProcessor() - for p in ["mesos", "stateful"]: - processor.load_plugin(provider_module="task_processing.plugins." + p) - mesos_executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": secret, - "mesos_address": mesos_address, - "role": "taskproc", - }, - ) - executor = processor.executor_from_config( - provider="stateful", - provider_config={ - "downstream_executor": mesos_executor, - "persister": FilePersistence(output_file="/tmp/foo"), - }, - ) - - runner = Sync(executor=executor) - tasks = set() - TaskConfig = mesos_executor.TASK_CONFIG_INTERFACE - for _ in range(1, 2): - task_config = TaskConfig(image="busybox", cmd="/bin/true") - tasks.add(task_config.task_id) - runner.run(task_config) - print(executor.status(task_config.task_id)) - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/hello-world.py b/examples/hello-world.py deleted file mode 100755 index 0f0a6b2d..00000000 --- a/examples/hello-world.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python3 -import os - -from task_processing.runners.sync import Sync -from task_processing.task_processor import TaskProcessor - -"""Simple hello-world example of how to use Task Processing (taskproc) -""" - - -def main(): - # get address of the Mesos cluster - mesos_address = os.getenv("MESOS", "mesosmaster:5050") - - # read in secret, this is used to authenticate the taskproc scheduler with - # Mesos - with open("./examples/cluster/secret") as f: - secret = f.read().strip() - - # create a processor instance - processor = TaskProcessor() - - # configure plugins - processor.load_plugin(provider_module="task_processing.plugins.mesos") - - # create an executor (taskproc executor NOT to be confused with a Mesos - # executor) using this defined configuration. this config can also be used - # to specify other Mesos properties, such as which role to use - executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": secret, - "mesos_address": mesos_address, - "role": "taskproc", - }, - ) - - # creates a new Sync runner that will synchronously execute tasks - # (i.e. block until completion) - runner = Sync(executor) - - # next, create a TaskConfig to run - # this is where properties of the Mesos task can be specified in this - # example, we use the busybox Docker image and just echo "hello world" - TaskConfig = executor.TASK_CONFIG_INTERFACE - task_config = TaskConfig(image="busybox", cmd='echo "hello world"') - - # run our task and print the result - result = runner.run(task_config) - print(result) - - # this stops the taskproc framework and unregisters it from Mesos - runner.stop() - - return 0 if result.success else 1 - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/offer_timeout.py b/examples/offer_timeout.py deleted file mode 100755 index 77f04898..00000000 --- a/examples/offer_timeout.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python3 -import logging -import time - -from common import parse_args - -from task_processing.runners.async_runner import Async -from task_processing.runners.async_runner import EventHandler -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -class Counter(object): - def __init__(self): - self.terminated = 0 - - def process_event(self, event): - print("task %s finished" % (event.task_id)) - self.terminated += 1 - - -def main(): - c = Counter() - args = parse_args() - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - mesos_executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": args.secret, - "mesos_address": args.master, - "pool": args.pool, - "role": args.role, - }, - ) - - TaskConfig = mesos_executor.TASK_CONFIG_INTERFACE - runner = Async( - mesos_executor, - [ - EventHandler( - predicate=lambda x: x.terminal, - cb=c.process_event, - ) - ], - ) - timeout_task_config = TaskConfig( - image="busybox", - cmd="exec /bin/sleep 100", - offer_timeout=5.0, - cpus=20, - mem=2048, - disk=2000, - ) - runner.run(timeout_task_config) - - for _ in range(50): - if c.terminated >= 1: - break - print("waiting for task %s to finish" % (timeout_task_config.task_id)) - time.sleep(2) - - runner.stop() - return 0 - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/promise.py b/examples/promise.py deleted file mode 100755 index 053ff0bf..00000000 --- a/examples/promise.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python3 -import logging -from concurrent.futures import ThreadPoolExecutor -from concurrent.futures import wait - -from common import parse_args - -from task_processing.runners.promise import Promise -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - args = parse_args() - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": args.secret, - "mesos_address": args.master, - "pool": args.pool, - "role": args.role, - }, - ) - - TaskConfig = executor.TASK_CONFIG_INTERFACE - task_config = TaskConfig(image="busybox", cmd="/bin/true") - # This only works on agents that have added mesos as a containerizer - # task_config = TaskConfig(containerizer='MESOS', cmd='/bin/true') - - with ThreadPoolExecutor(max_workers=2) as futures_executor: - runner = Promise(executor, futures_executor) - future = runner.run(task_config) - wait([future]) - result = future.result() - print(result) - print(result.raw) - runner.stop() - - return 0 if result.success else 1 - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/retry.py b/examples/retry.py deleted file mode 100755 index 29f6642a..00000000 --- a/examples/retry.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python3 -import logging - -from common import parse_args - -from task_processing.runners.sync import Sync -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - args = parse_args() - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - mesos_executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": args.secret, - "mesos_address": args.master, - "pool": args.pool, - "role": args.role, - }, - ) - - executor = processor.executor_from_config( - provider="retrying", - provider_config={ - "downstream_executor": mesos_executor, - }, - ) - - TaskConfig = mesos_executor.TASK_CONFIG_INTERFACE - runner = Sync(executor=executor) - task_config = TaskConfig( - image="docker-dev.yelpcorp.com/dumb-busybox", cmd="/bin/false", retries=2 - ) - result = runner.run(task_config) - print(result) - - runner.stop() - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/subscription.py b/examples/subscription.py deleted file mode 100755 index 62247277..00000000 --- a/examples/subscription.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python3 -import logging -import os - -from six.moves.queue import Empty -from six.moves.queue import Queue - -from task_processing.runners.subscription import Subscription -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - mesos_address = os.environ["MESOS"] - with open("./examples/cluster/secret") as f: - secret = f.read().strip() - - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": secret, - "mesos_address": mesos_address, - "role": "taskproc", - }, - ) - - queue = Queue(100) - runner = Subscription(executor, queue) - - tasks = set() - TaskConfig = executor.TASK_CONFIG_INTERFACE - for _ in range(2): - task_config = TaskConfig(image="busybox", cmd="/bin/true") - tasks.add(task_config.task_id) - runner.run(task_config) - - print("Running {} tasks: {}".format(len(tasks), tasks)) - while len(tasks) > 0: - try: - event = queue.get(block=True, timeout=10) - except Empty: - event = None - - if event is None: - print("Timeout while waiting for {}".format(tasks)) - break - else: - if event.terminal: - tasks.discard(event.task_id) - - runner.stop() - return 0 if len(tasks) == 0 else 1 - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/sync.py b/examples/sync.py deleted file mode 100755 index aa290356..00000000 --- a/examples/sync.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python3 -import logging - -from common import parse_args - -from task_processing.runners.sync import Sync -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - args = parse_args() - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": args.secret, - "mesos_address": args.master, - "pool": args.pool, - "role": args.role, - }, - ) - - TaskConfig = executor.TASK_CONFIG_INTERFACE - task_config = TaskConfig(image="busybox", cmd="/bin/true") - # This only works on agents that have added mesos as a containerizer - # task_config = TaskConfig(containerizer='MESOS', cmd='/bin/true') - - runner = Sync(executor) - result = runner.run(task_config) - print(result) - print(result.raw) - runner.stop() - - return 0 if result.success else 1 - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/task_logging.py b/examples/task_logging.py deleted file mode 100755 index 6bff6ad1..00000000 --- a/examples/task_logging.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python3 -import logging - -from common import parse_args - -from task_processing.runners.sync import Sync -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - args = parse_args() - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - mesos_executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": args.secret, - "mesos_address": args.master, - "pool": args.pool, - "role": args.role, - }, - ) - - executor = processor.executor_from_config( - provider="logging", - provider_config={ - "downstream_executor": mesos_executor, - }, - ) - - TaskConfig = mesos_executor.TASK_CONFIG_INTERFACE - runner = Sync(executor=executor) - task_config = TaskConfig( - image="ubuntu:14.04", - cmd="bash -c 'for i in $(seq 1 5); do echo $i&&sleep 10; done'", - ) - result = runner.run(task_config) - print(result) - - runner.stop() - - -if __name__ == "__main__": - exit(main()) diff --git a/examples/timeout.py b/examples/timeout.py deleted file mode 100755 index f1d5a603..00000000 --- a/examples/timeout.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -import logging - -from common import parse_args - -from task_processing.runners.sync import Sync -from task_processing.task_processor import TaskProcessor - -FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s" -LEVEL = logging.DEBUG -logging.basicConfig(format=FORMAT, level=LEVEL) - - -def main(): - args = parse_args() - processor = TaskProcessor() - processor.load_plugin(provider_module="task_processing.plugins.mesos") - mesos_executor = processor.executor_from_config( - provider="mesos_task", - provider_config={ - "secret": args.secret, - "mesos_address": args.master, - "pool": args.pool, - "role": args.role, - }, - ) - - executor = processor.executor_from_config( - provider="timeout", - provider_config={ - "downstream_executor": mesos_executor, - }, - ) - - TaskConfig = mesos_executor.TASK_CONFIG_INTERFACE - runner = Sync(executor=executor) - task_config = TaskConfig( - image="docker-dev.yelpcorp.com/dumb-busybox", - cmd="exec dumb-init /bin/sleep 30", - timeout=10, - ) - result = runner.run(task_config) - print(result) - - runner.stop() - - -if __name__ == "__main__": - exit(main()) diff --git a/itest b/itest deleted file mode 100755 index 8957424e..00000000 --- a/itest +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -set -eux - -examples/async.py -examples/file_persistence.py -examples/hello-world.py -examples/subscription.py -examples/sync.py -examples/retry.py -examples/timeout.py - -# TODO: These should probably also be run eventually: -# examples/promise.py -# examples/dynamo_persistence.py diff --git a/setup.py b/setup.py index cfd095b8..7158d454 100644 --- a/setup.py +++ b/setup.py @@ -39,8 +39,6 @@ "pyrsistent", ], extras_require={ - # We can add the Mesos specific dependencies here - "mesos_executor": ["addict", "pymesos>=0.2.14", "requests"], "metrics": ["yelp-meteorite"], "persistence": ["boto3"], "k8s": ["kubernetes", "typing-extensions"], diff --git a/task_processing/plugins/mesos/__init__.py b/task_processing/plugins/mesos/__init__.py deleted file mode 100644 index 5f546fe5..00000000 --- a/task_processing/plugins/mesos/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from .logging_executor import MesosLoggingExecutor -from .mesos_pod_executor import MesosPodExecutor -from .mesos_task_executor import MesosTaskExecutor -from .retrying_executor import RetryingExecutor -from .timeout_executor import TimeoutExecutor - - -TASK_PROCESSING_PLUGIN = "mesos_plugin" - - -def register_plugin(registry): - return ( - registry.register_task_executor("logging", MesosLoggingExecutor) - .register_deprecated_task_executor("mesos", MesosTaskExecutor) - .register_task_executor("mesos_task", MesosTaskExecutor) - .register_task_executor("mesos_pod", MesosPodExecutor) - .register_task_executor("retrying", RetryingExecutor) - .register_task_executor("timeout", TimeoutExecutor) - ) diff --git a/task_processing/plugins/mesos/constraints.py b/task_processing/plugins/mesos/constraints.py deleted file mode 100644 index 4b470ab8..00000000 --- a/task_processing/plugins/mesos/constraints.py +++ /dev/null @@ -1,65 +0,0 @@ -import re - -from pyrsistent import field -from pyrsistent import PRecord - - -def equals_op(expected_value, actual_value): - return expected_value == actual_value - - -def notequals_op(expected_value, actual_value): - return expected_value != actual_value - - -def like_op(re_pattern, actual_value): - return re.fullmatch(re_pattern, actual_value) - - -def unlike_op(re_pattern, actual_value): - return not like_op(re_pattern, actual_value) - - -OPERATORS = { - "EQUALS": equals_op, - "==": equals_op, - "NOTEQUALS": notequals_op, - "!=": notequals_op, - "LIKE": like_op, - "UNLIKE": unlike_op, -} - - -def _attributes_match_constraint(attributes, constraint): - actual_value = attributes.get(constraint.attribute) - # If the dictionary doesn't contain an attribute from the constraint then - # the constraint is satisfied. - if actual_value is None: - return True - - # The operator names have already been validated by the validator in - # `MesosTaskConfig`, so it's guaranteed that it's in `OPERATORS`. - return OPERATORS[constraint.operator](constraint.value, actual_value) - - -def attributes_match_constraints(attributes, constraints): - # If constraints aren't specified then they are satisfied. - if constraints is None: - return True - - return all(_attributes_match_constraint(attributes, c) for c in constraints) - - -def valid_constraint_operator_name(name): - operators_names = OPERATORS.keys() - return ( - name in operators_names, - "{operator} is not a valid operator, valid operators are " - "{operators}.".format(operator=name, operators=operators_names), - ) - - -class Constraint(PRecord): - attribute = field(type=str) - operator = field(type=str, invariant=valid_constraint_operator_name) - value = field(type=str) diff --git a/task_processing/plugins/mesos/execution_framework.py b/task_processing/plugins/mesos/execution_framework.py deleted file mode 100644 index 357143f5..00000000 --- a/task_processing/plugins/mesos/execution_framework.py +++ /dev/null @@ -1,693 +0,0 @@ -import logging -import socket -import threading -import time -from collections import defaultdict -from queue import Queue -from typing import Optional # noqa, flake8 issue -from typing import TYPE_CHECKING - -from addict import Dict -from pymesos.interface import Scheduler -from pyrsistent import field -from pyrsistent import m -from pyrsistent import PMap -from pyrsistent import pmap -from pyrsistent import PRecord -from pyrsistent import PVector -from pyrsistent import v - -from task_processing.interfaces.event import control_event -from task_processing.interfaces.event import task_event -from task_processing.metrics import create_counter -from task_processing.metrics import create_timer -from task_processing.metrics import get_metric -from task_processing.plugins.mesos import metrics -from task_processing.plugins.mesos.resource_helpers import get_offer_resources - - -if TYPE_CHECKING: - from .mesos_executor import MesosExecutorCallbacks # noqa - - -log = logging.getLogger(__name__) - - -class TaskMetadata(PRecord): - agent_id = field(type=str, initial="") - task_config = field(type=PRecord, mandatory=True) - task_state = field(type=str, mandatory=True) - task_state_history = field(type=PMap, factory=pmap, mandatory=True) - - -class ExecutionFramework(Scheduler): - callbacks: "MesosExecutorCallbacks" - - def __init__( - self, - name, - role, - callbacks: "MesosExecutorCallbacks", - task_staging_timeout_s, - pool=None, - slave_blacklist_timeout_s=900, - offer_backoff=10, - suppress_delay=10, - initial_decline_delay=1, - task_reconciliation_delay=300, - framework_id=None, - failover_timeout=604800, # 1 week - ) -> None: - self.name = name - # wait this long for a task to launch. - self.task_staging_timeout_s = task_staging_timeout_s - self.pool = pool - self.role = role - self.callbacks = callbacks - self.slave_blacklist_timeout_s = slave_blacklist_timeout_s - self.offer_backoff = offer_backoff - - # TODO: why does this need to be root, can it be "mesos plz figure out" - self.framework_info = Dict( - user="root", - name=self.name, - checkpoint=True, - role=self.role, - failover_timeout=failover_timeout, - ) - if framework_id: - self.framework_info["id"] = {"value": framework_id} - - self.task_queue: Queue = Queue() - self.event_queue: Queue = Queue() - self._driver: Optional[Scheduler] = None - self.are_offers_suppressed = False - self.suppress_after = int(time.time()) + suppress_delay - self.decline_after = time.time() + initial_decline_delay - self._task_reconciliation_delay = task_reconciliation_delay - self._reconcile_tasks_at = time.time() + self._task_reconciliation_delay - - self.offer_decline_filter = Dict(refuse_seconds=self.offer_backoff) - self._lock = threading.RLock() - self.blacklisted_slaves: PVector = v() - self.task_metadata: PMap = m() - - self._initialize_metrics() - self._last_offer_time: Optional[float] = None - self._terminal_task_counts = { - "TASK_FINISHED": metrics.TASK_FINISHED_COUNT, - "TASK_LOST": metrics.TASK_LOST_COUNT, - "TASK_KILLED": metrics.TASK_KILLED_COUNT, - "TASK_FAILED": metrics.TASK_FAILED_COUNT, - "TASK_ERROR": metrics.TASK_ERROR_COUNT, - "TASK_OFFER_TIMEOUT": metrics.TASK_OFFER_TIMEOUT, - } - - self.driver_error = object() - - self.stopping = False - task_kill_thread = threading.Thread(target=self._background_check, args=()) - task_kill_thread.daemon = True - task_kill_thread.start() - - def call_driver(self, method, *args, **kwargs): - if not self._driver: - log.error(f"{method} failed: No driver") - return self.driver_error - - try: - return getattr(self._driver, method)(*args, **kwargs) - except (socket.timeout, Exception) as e: - log.warning(f"{method} failed: {str(e)}") - return self.driver_error - - def _background_check_task(self, time_now, tasks_to_reconcile, task_id, md): - if md.task_state != "TASK_INITED": - tasks_to_reconcile.append(task_id) - - if md.task_state == "TASK_INITED": - # give up if the task hasn't launched after - # offer_timeout - inited_at = md.task_state_history["TASK_INITED"] - offer_timeout = md.task_config.offer_timeout - expires_at = inited_at + offer_timeout - if time_now >= expires_at: - log.warning( - f"Task {task_id} has been waiting for offers " - "for longer than configured timeout " - f"{offer_timeout}. Giving up and removing the " - "task from the task queue." - ) - # killing the task will also remove them from the queue - self.kill_task(task_id) - # we are not expecting mesos to send terminal update - # for this task, so cleaning it up manually - self.task_metadata = self.task_metadata.discard(task_id) - self.event_queue.put( - task_event( - task_id=task_id, - terminal=True, - timestamp=time_now, - success=False, - message="stop", - task_config=md.task_config, - raw="Failed due to offer timeout", - ) - ) - get_metric(metrics.TASK_OFFER_TIMEOUT).count(1) - - # Task is not eligible for killing or reenqueuing - in_current_state_since = md.task_state_history[md.task_state] - if time_now < in_current_state_since + self.task_staging_timeout_s: - return - - if md.task_state == "UNKNOWN": - log.warning( - f"Re-enqueuing task {task_id} in unknown state for " - f"longer than {self.task_staging_timeout_s}" - ) - # Re-enqueue task - self.enqueue_task(md.task_config) - get_metric(metrics.TASK_FAILED_TO_LAUNCH_COUNT).count(1) - elif md.task_state == "TASK_STAGING": - log.warning(f"Killing stuck task {task_id}") - self.kill_task(task_id) - self.task_metadata = self.task_metadata.set( - task_id, - md.set( - task_state="TASK_STUCK", - task_state_history=md.task_state_history.set( - "TASK_STUCK", time_now - ), - ), - ) - self.blacklist_slave( - agent_id=self.task_metadata[task_id].agent_id, - timeout=self.slave_blacklist_timeout_s, - ) - get_metric(metrics.TASK_STUCK_COUNT).count(1) - elif md.task_state == "TASK_STUCK": - t = time.time() - # 10s since last iteration + time we spent in current one - time_delta = 10 + t - time_now - # seconds since task was put in TASK_STUCK state - time_stuck = t - md.task_state_history["TASK_STUCK"] - # seconds since `time_stuck` crossed another hour - # boundary - hour_rolled = time_stuck % 3600 - - # if `time_stuck` crossed hour boundary since last - # background check - lets re-send kill request - if hour_rolled < time_delta: - hours_stuck = time_stuck // 3600 - log.warning( - f"Task {task_id} is stuck, waiting for terminal " - f"state for {hours_stuck}h, sending another kill" - ) - self.kill_task(task_id) - - def _background_check(self): - while True: - if self.stopping: - return - - time_now = time.time() - tasks_to_reconcile = [] - with self._lock: - for task_id, md in self.task_metadata.items(): - self._background_check_task( - time_now, - tasks_to_reconcile, - task_id, - md, - ) - - self._reconcile_tasks( - [ - Dict({"task_id": Dict({"value": task_id})}) - for task_id in tasks_to_reconcile - ] - ) - elapsed = time.time() - time_now - log.info(f"background check done in {elapsed}s") - get_metric(metrics.BGCHECK_TIME_TIMER).record(elapsed) - time.sleep(10) - - def reconcile_task(self, task_config): - task_id = task_config.task_id - with self._lock: - if task_id in self.task_metadata: - md = self.task_metadata[task_id] - self.task_metadata = self.task_metadata.set( - task_id, - md.set( - task_state="TASK_RECONCILING", - task_state_history=md.task_state_history.set( - "TASK_RECONCILING", time.time() - ), - ), - ) - else: - log.info(f"Adding {task_id} to metadata for reconciliation") - self.task_metadata = self.task_metadata.set( - task_id, - TaskMetadata( - task_config=task_config, - task_state="TASK_RECONCILING", - task_state_history=m(TASK_RECONCILING=time.time()), - ), - ) - self._reconcile_tasks([Dict({"task_id": Dict({"value": task_id})})]) - - def _reconcile_tasks(self, tasks_to_reconcile): - if time.time() < self._reconcile_tasks_at: - return - - log.info(f"Reconciling following tasks {tasks_to_reconcile}") - - if len(tasks_to_reconcile) > 0: - self.call_driver("reconcileTasks", tasks_to_reconcile) - - self._reconcile_tasks_at += self._task_reconciliation_delay - - def offer_matches_pool(self, offer): - if self.pool is None: - # If pool is not specified, then we can accept offer from any agent - return True, None - - for attribute in offer.attributes: - if attribute.name == "pool": - return attribute.text.value == self.pool, attribute.text.value - - return False, None - - def kill_task(self, task_id): - tmp_list = [] - flag = False - with self._lock: - while not self.task_queue.empty(): - t = self.task_queue.get() - if task_id == t.task_id: - flag = True - self.task_metadata = self.task_metadata.discard(task_id) - else: - tmp_list.append(t) - - for t in tmp_list: - self.task_queue.put(t) - - if flag is False: - if self.call_driver("killTask", Dict(value=task_id)) is self.driver_error: - return False - - return True - - def blacklist_slave(self, agent_id, timeout): - with self._lock: - # A new entry is appended even if the agent is being blacklisted. - # This is equivalent to restarting the blacklist timer. - log.info(f"Blacklisting slave: {agent_id} for {timeout} seconds.") - self.blacklisted_slaves = self.blacklisted_slaves.append(agent_id) - get_metric(metrics.BLACKLISTED_AGENTS_COUNT).count(1) - - unblacklist_thread = threading.Thread( - target=self.unblacklist_slave, - kwargs={"timeout": timeout, "agent_id": agent_id}, - ) - unblacklist_thread.daemon = True - unblacklist_thread.start() - - def unblacklist_slave(self, agent_id, timeout): - time.sleep(timeout) - log.info(f"Unblacklisting slave: {agent_id}") - with self._lock: - self.blacklisted_slaves = self.blacklisted_slaves.remove(agent_id) - - def enqueue_task(self, task_config): - with self._lock: - # task_state and task_state_history get reset every time - # a task is enqueued. - self.task_metadata = self.task_metadata.set( - task_config.task_id, - TaskMetadata( - task_config=task_config, - task_state="TASK_INITED", - task_state_history=m(TASK_INITED=time.time()), - ), - ) - # Need to lock on task_queue to prevent enqueues when getting - # tasks to launch - self.task_queue.put(task_config) - - if self.are_offers_suppressed: - if self.call_driver("reviveOffers") is not self.driver_error: - self.are_offers_suppressed = False - log.info("Reviving offers because we have tasks to run.") - - get_metric(metrics.TASK_ENQUEUED_COUNT).count(1) - - def launch_tasks_for_offer(self, offer, tasks_to_launch) -> bool: - mesos_protobuf_tasks = [ - self.callbacks.make_mesos_protobuf( - task_config, offer.agent_id.value, self.role - ) - for task_config in tasks_to_launch - if task_config.task_id in self.task_metadata - ] - if not mesos_protobuf_tasks: - return False - - launched = True - launch_time = time.time() - if ( - self.call_driver("launchTasks", offer.id, mesos_protobuf_tasks) - is self.driver_error - ): - tasks = ", ".join(task.task_id for task in tasks_to_launch) - log.warning(f"Failed to launch: {tasks}, moving them to UNKNOWN state") - get_metric(metrics.TASK_LAUNCH_FAILED_COUNT).count(1) - launched = False - - # 'UNKNOWN' state is for internal tracking. It will not be - # propogated to users. - current_task_state = "TASK_STAGING" if launched else "UNKNOWN" - - for task in tasks_to_launch: - md = self.task_metadata.get(task.task_id) - if not md: - log.warning( - f"trying to launch task {task.task_id}, but it is not in task metadata." - f"current keys in task_metadata: {self.task_metadata.keys()}" - ) - continue - self.task_metadata = self.task_metadata.set( - task.task_id, - md.set( - task_state=current_task_state, - task_state_history=md.task_state_history.set( - current_task_state, launch_time - ), - agent_id=str(offer.agent_id.value), - ), - ) - - get_metric(metrics.TASK_QUEUED_TIME_TIMER).record( - launch_time - md.task_state_history["TASK_INITED"] - ) - - # Emit the staging event for successful launches - if launched: - self.event_queue.put( - self.callbacks.handle_status_update( - Dict(state="TASK_STAGING", offer=offer), - md.task_config, - ) - ) - get_metric(metrics.TASK_LAUNCHED_COUNT).count(1) - - return launched - - def stop(self): - self.stopping = True - - # TODO: add mesos cluster dimension when available - def _initialize_metrics(self): - default_dimensions = { - "framework_name": ".".join(self.name.split()[:2]), - "framework_role": self.role, - } - - counters = [ - metrics.TASK_LAUNCHED_COUNT, - metrics.TASK_FINISHED_COUNT, - metrics.TASK_FAILED_COUNT, - metrics.TASK_KILLED_COUNT, - metrics.TASK_LOST_COUNT, - metrics.TASK_ERROR_COUNT, - metrics.TASK_ENQUEUED_COUNT, - metrics.TASK_INSUFFICIENT_OFFER_COUNT, - metrics.TASK_STUCK_COUNT, - metrics.BLACKLISTED_AGENTS_COUNT, - metrics.TASK_LOST_DUE_TO_INVALID_OFFER_COUNT, - metrics.TASK_LAUNCH_FAILED_COUNT, - metrics.TASK_FAILED_TO_LAUNCH_COUNT, - metrics.TASK_OFFER_TIMEOUT, - ] - for cnt in counters: - create_counter(cnt, default_dimensions) - - timers = [ - metrics.OFFER_DELAY_TIMER, - metrics.TASK_QUEUED_TIME_TIMER, - metrics.BGCHECK_TIME_TIMER, - ] - for tmr in timers: - create_timer(tmr, default_dimensions) - - #################################################################### - # Mesos driver hooks go here # - #################################################################### - def offerRescinded(self, driver, offerId): - # TODO(sagarp): Executor should be able to deal with this. - log.warning(f"Offer {offerId} rescinded") - - def error(self, driver, message): - event = control_event(raw=message) - - # TODO: have a mapper function similar to translator of task events - if message == "Framework has been removed": - event = event.set(message="stop") - else: - event = event.set(message="unknown") - - self.event_queue.put(event) - - def slaveLost(self, drive, slaveId): - log.warning(f"Slave lost: {str(slaveId)}") - - def registered(self, driver, frameworkId, masterInfo): - self._driver = driver - event = control_event( - raw={ - "master_info": masterInfo, - "framework_id": frameworkId, - }, - message="registered", - ) - self.event_queue.put(event) - log.info( - f"Registered with framework ID {frameworkId.value} and role {self.role}" - ) - - def reregistered(self, driver, masterInfo): - self._driver = driver - log.warning(f"Re-registered to {masterInfo} with role {self.role}") - - def resourceOffers(self, driver, offers) -> None: - self._driver = driver - - current_offer_time = time.time() - if self._last_offer_time is not None: - get_metric(metrics.OFFER_DELAY_TIMER).record( - current_offer_time - self._last_offer_time - ) - self._last_offer_time = current_offer_time - - # Give user some time to enqueue tasks - if self.task_queue.empty() and current_offer_time < self.decline_after: - time.sleep(self.decline_after - current_offer_time) - - declined: dict = defaultdict(list) - declined_offer_ids = [] - accepted = [] - - with self._lock: - if self.task_queue.empty(): - # Always suppress offers when there is nothing to run - if self.call_driver("suppressOffers") is not self.driver_error: - self.are_offers_suppressed = True - log.info("Suppressing offers, no more tasks to run.") - - for offer in offers: - declined["no tasks"].append(offer.id.value) - declined_offer_ids.append(offer.id) - - self.call_driver( - "declineOffer", declined_offer_ids, self.offer_decline_filter - ) - log.info( - f"Offers declined because of no tasks: {','.join(declined['no tasks'])}" - ) - return - - with_maintenance_window = [offer for offer in offers if offer.unavailability] - - for offer in with_maintenance_window: - start_time = offer.unavailability.start["nanoseconds"] - completion_time = int( - (start_time + offer.unavailability.duration["nanoseconds"]) / 1000000000 - ) - now = int(time.time()) - duration = completion_time - now - if duration > 0: - self.blacklist_slave( - agent_id=offer.agent_id.value, - timeout=duration, - ) - - without_maintenance_window = [ - offer for offer in offers if offer not in with_maintenance_window - ] - for offer in without_maintenance_window: - with self._lock: - if offer.agent_id.value in self.blacklisted_slaves: - declined["blacklisted"].append( - f"offer {offer.id.value} agent {offer.agent_id.value}" - ) - declined_offer_ids.append(offer.id) - continue - - offer_pool_match, offer_pool = self.offer_matches_pool(offer) - if not offer_pool_match: - log.info( - f"Declining offer {offer.id.value}, required pool " - f"{self.pool} doesn't match offered pool {offer_pool}" - ) - declined["bad pool"].append(offer.id.value) - declined_offer_ids.append(offer.id) - continue - - # Need to lock here even though we are only reading the task_queue, since - # we are predicating on the queue's emptiness. If not locked, other - # threads can continue enqueueing, and we never terminate the loop. - task_configs = [] - with self._lock: - while not self.task_queue.empty(): - task_configs.append(self.task_queue.get()) - - offer_resources = get_offer_resources(offer, self.role) - offer_attributes = { - attribute.name: attribute.text.value - for attribute in offer.attributes - } - log.info( - f"Received offer {offer.id.value} for role {self.role}: {offer_resources}" - ) - tasks_to_launch, tasks_to_defer = self.callbacks.get_tasks_for_offer( - task_configs, - offer_resources, - offer_attributes, - self.role, - ) - - for task in tasks_to_defer: - self.task_queue.put(task) - get_metric(metrics.TASK_INSUFFICIENT_OFFER_COUNT).count( - len(tasks_to_defer) - ) - - if len(tasks_to_launch) == 0: - declined["bad resources"].append(offer.id.value) - declined_offer_ids.append(offer.id) - continue - - ignored_tasks = ",".join( - task_config.task_id - for task_config in tasks_to_launch - if task_config.task_id not in self.task_metadata - ) - if ignored_tasks: - log.warning(f"ignoring tasks not in metadata: {ignored_tasks}") - - tasks_to_launch = [ - task_config - for task_config in tasks_to_launch - if task_config.task_id in self.task_metadata - ] - - if len(tasks_to_launch) == 0: - declined["nothing to launch"].append(offer.id.value) - declined_offer_ids.append(offer.id) - elif not self.launch_tasks_for_offer(offer, tasks_to_launch): - declined["launch failed"].append(offer.id.value) - declined_offer_ids.append(offer.id) - else: - accepted.append( - f"offer: {offer.id.value} " - f"agent: {offer.agent_id.value} " - f"tasks: {len(tasks_to_launch)}" - ) - - if len(declined_offer_ids) > 0: - self.call_driver( - "declineOffer", declined_offer_ids, self.offer_decline_filter - ) - for reason, items in declined.items(): - log.info(f"Offers declined because {reason}: {', '.join(items)}") - if accepted: - log.info(f"Offers accepted: {', '.join(accepted)}") - - def statusUpdate(self, driver, update) -> None: - self._driver = driver - - task_id = update.task_id.value - task_state = str(update.state) - log.info(f"Task update {task_state} received for task {task_id}") - - if task_id not in self.task_metadata: - # We assume that a terminal status update has been - # received for this task already. - log.info( - "Ignoring this status update because a terminal status " - "update has been received for this task already." - ) - self.call_driver("acknowledgeStatusUpdate", update) - return - - md = self.task_metadata[task_id] - - # If we attempt to accept an offer that has been invalidated by - # master for some reason such as offer has been rescinded or we - # have exceeded offer_timeout, then we will get TASK_LOST status - # update back from mesos master. - if task_state == "TASK_LOST" and str(update.reason) == "REASON_INVALID_OFFERS": - # This task has not been launched. Therefore, we are going to - # reenqueue it. We are not propogating any event up to the - # application. - log.warning( - "Received TASK_LOST from mesos master because we " - "attempted to accept an invalid offer. Going to " - f"re-enqueue this task {task_id}" - ) - # Re-enqueue task - self.enqueue_task(md.task_config) - get_metric(metrics.TASK_LOST_DUE_TO_INVALID_OFFER_COUNT).count(1) - self.call_driver("acknowledgeStatusUpdate", update) - return - - # Record state changes, send a new event and emit metrics only if the - # task state has actually changed. - if md.task_state != task_state: - with self._lock: - self.task_metadata = self.task_metadata.set( - task_id, - md.set( - task_state=task_state, - task_state_history=md.task_state_history.set( - task_state, time.time() - ), - ), - ) - - self.event_queue.put( - self.callbacks.handle_status_update(update, md.task_config), - ) - - if task_state in self._terminal_task_counts: - with self._lock: - self.task_metadata = self.task_metadata.discard(task_id) - get_metric(self._terminal_task_counts[task_state]).count(1) - - # We have to do this because we are not using implicit - # acknowledgements. - self.call_driver("acknowledgeStatusUpdate", update) diff --git a/task_processing/plugins/mesos/logging_executor.py b/task_processing/plugins/mesos/logging_executor.py deleted file mode 100644 index f77cca12..00000000 --- a/task_processing/plugins/mesos/logging_executor.py +++ /dev/null @@ -1,269 +0,0 @@ -import logging -import sys -import time -from queue import Queue -from threading import Lock -from threading import Thread -from urllib.parse import urlparse - -import requests -from pyrsistent import field -from pyrsistent import m -from pyrsistent import PMap -from pyrsistent import pmap -from pyrsistent import PRecord -from pyrsistent import v - -from task_processing.interfaces.task_executor import TaskExecutor - - -log = logging.getLogger(__name__) -logging.getLogger("urllib3").setLevel(logging.WARNING) - -# Read task log in 4K chunks -TASK_LOG_CHUNK_LEN = 4096 -DEFAULT_FORMAT = "{task_id}[{container_id}@{agent}]: {line}" -LOG_REQUEST_TIMEOUT = 5 # seconds - - -class LogMetadata(PRecord): - log_url = field(type=str, initial="") - log_path = field(type=str, initial="") - log_offsets = field( - type=PMap, factory=pmap, initial=pmap({"stdout": 0, "stderr": 0}) - ) - container_id = field(type=str, initial="") - executor_id = field(type=str, initial="") - - -def standard_handler(task_id, message, stream): - print(message, file=sys.stderr if stream == "stderr" else sys.stdout) - - -class MesosLoggingExecutor(TaskExecutor): - def __init__( - self, - downstream_executor, - handler=standard_handler, - format_string=DEFAULT_FORMAT, - ): - self.downstream_executor = downstream_executor - self.TASK_CONFIG_INTERFACE = downstream_executor.TASK_CONFIG_INTERFACE - self.handler = handler - self.format_string = format_string - - self.src_queue = downstream_executor.get_event_queue() - self.dest_queue = Queue() - self.stopping = False - - self.staging_tasks = m() - self.running_tasks = m() - self.done_tasks = v() - - # A lock is needed to synchronize logging and event processing - self.task_lock = Lock() - - self.event_thread = Thread(target=self.event_loop) - self.event_thread.daemon = True - self.event_thread.start() - - self.logging_thread = Thread(target=self.logging_loop) - self.logging_thread.daemon = True - self.logging_thread.start() - - def log_line(self, stream, line, task_id, container_id, agent): - formatted_line = self.format_string.format( - task_id=task_id, - container_id=container_id, - agent=agent, - line=line, - ) - self.handler(task_id, formatted_line, stream) - - def set_task_log_path(self, task_id): - log_md = self.running_tasks[task_id] - if log_md.log_url is None: - log.error(f"No log_url available for {task_id}") - return - try: - response = requests.get( - log_md.log_url + "/files/debug", - timeout=LOG_REQUEST_TIMEOUT, - ).json() - except Exception as e: - log.error("Failed to fetch files {error}".format(error=e)) - return - - for key in response.keys(): - if log_md.executor_id in key and log_md.container_id in key: - with self.task_lock: - self.running_tasks = self.running_tasks.set( - task_id, - log_md.set(log_path=key), - ) - break - - def stream_task_log(self, task_id): - if self.running_tasks[task_id].log_path == "": - self.set_task_log_path(task_id) - - # Abort in case the log path discovery was not successful - log_md = self.running_tasks[task_id] - if log_md.log_path == "": - return - - offsets = { - "stdout": log_md.log_offsets["stdout"], - "stderr": log_md.log_offsets["stderr"], - } - agent = urlparse(log_md.log_url).hostname - - for f in ["stdout", "stderr"]: - offset = offsets[f] - log_path = log_md.log_path + "/" + f - while True: - payload = { - "path": log_path, - "length": str(TASK_LOG_CHUNK_LEN), - "offset": str(offset), - } - - try: - response = requests.get( - log_md.log_url + "/files/read", - params=payload, - timeout=LOG_REQUEST_TIMEOUT, - ).json() - - log_length = len(response["data"]) - for line in response["data"].splitlines(): - self.log_line( - stream=f, - line=line, - task_id=task_id, - container_id=log_md.container_id, - agent=agent, - ) - except Exception as e: - log.error( - "Failed to get {path}@{agent} {error}".format( - path=log_path, agent=agent, error=e - ) - ) - break - - offset = offset + log_length - # Stop if there is no more data - if log_length < TASK_LOG_CHUNK_LEN: - break - # Update offset of this stream - offsets[f] = offset - - # Update both offsets for the task - with self.task_lock: - self.running_tasks = self.running_tasks.set( - task_id, - log_md.set(log_offsets=pmap(offsets)), - ) - - # process downstream events - def event_loop(self): - while True: - while not self.src_queue.empty(): - e = self.src_queue.get() - self.dest_queue.put(e) - self.src_queue.task_done() - - # Record the base log url - if e.kind == "task" and e.platform_type == "staging": - if e.task_id in self.staging_tasks: - continue - url = extract_url_from_offer(e.raw.offer) - self.staging_tasks = self.staging_tasks.set(e.task_id, url) - - if e.kind == "task" and e.platform_type == "running": - if e.task_id not in self.staging_tasks: - log.info(f"Task {e.task_id} already running, not fetching logs") - continue - - url = self.staging_tasks[e.task_id] - self.staging_tasks = self.staging_tasks.discard(e.task_id) - - # Simply pass the needed fields and let the logging thread - # to take care of the slow path discovery. - container_id = e.raw.container_status.container_id.value - executor_id = e.raw.executor_id.value - with self.task_lock: - self.running_tasks = self.running_tasks.set( - e.task_id, - LogMetadata( - log_url=url, - container_id=container_id, - executor_id=executor_id, - ), - ) - - # Fetch the last log and remove the entry if the task is active - if e.kind == "task" and e.terminal: - with self.task_lock: - if e.task_id in self.running_tasks: - self.done_tasks = self.done_tasks.append(e.task_id) - - if self.stopping: - return - - time.sleep(1) - - def logging_loop(self): - while True: - # grab logs - for task_id in self.running_tasks.keys(): - self.stream_task_log(task_id) - - while len(self.done_tasks): - task_id = self.done_tasks[0] - self.stream_task_log(task_id) - with self.task_lock: - self.done_tasks = self.done_tasks.remove(task_id) - self.running_tasks = self.running_tasks.discard(task_id) - - if self.stopping: - return - - time.sleep(10) - - def run(self, task_config): - self.downstream_executor.run(task_config) - - def stop(self): - self.downstream_executor.stop() - self.stopping = True - self.event_thread.join() - self.logging_thread.join() - - def get_event_queue(self): - return self.dest_queue - - def reconcile(self, task_config): - self.downstream_executor.reconcile(task_config) - - def kill(self, task_id): - return self.downstream_executor.kill(task_id) - - -def extract_url_from_offer(offer): - try: - url = ( - offer.url.scheme - + "://" - + offer.url.address.ip - + ":" - + str(offer.url.address.port) - ) - except Exception as exc: - log.error( - f"Error decoding the url for this offer: {offer.url}. " - f"Setting to None. Exception: {exc}" - ) - url = None - return url diff --git a/task_processing/plugins/mesos/mesos_executor.py b/task_processing/plugins/mesos/mesos_executor.py deleted file mode 100644 index 978e7d1b..00000000 --- a/task_processing/plugins/mesos/mesos_executor.py +++ /dev/null @@ -1,108 +0,0 @@ -import logging -import threading -from typing import Callable -from typing import List -from typing import NamedTuple -from typing import Tuple - -import addict -from pymesos import MesosSchedulerDriver - -from task_processing.interfaces.event import Event -from task_processing.interfaces.task_executor import TaskExecutor -from task_processing.plugins.mesos.execution_framework import ExecutionFramework -from task_processing.plugins.mesos.resource_helpers import ResourceSet -from task_processing.plugins.mesos.task_config import MesosTaskConfig - - -class MesosExecutorCallbacks(NamedTuple): - get_tasks_for_offer: Callable[ - [List[MesosTaskConfig], ResourceSet, dict, str], - Tuple[List[addict.Dict], List[MesosTaskConfig]], - ] - handle_status_update: Callable[ - [addict.Dict, MesosTaskConfig], - Event, - ] - make_mesos_protobuf: Callable[ - [MesosTaskConfig, str, str], - addict.Dict, - ] - - -class MesosExecutor(TaskExecutor): - def __init__( - self, - role: str, - callbacks: MesosExecutorCallbacks, - pool=None, - principal="taskproc", - secret=None, - mesos_address="127.0.0.1:5050", - initial_decline_delay=1.0, - framework_name="taskproc-default", - framework_staging_timeout=240, - framework_id=None, - failover=False, - ) -> None: - """ - Constructs the instance of a task execution, encapsulating all state - required to run, monitor and stop the job. - - TODO param docstrings - """ - - self.logger = logging.getLogger(__name__) - self.role = role - self.failover = failover - - self.execution_framework = ExecutionFramework( - role=role, - pool=pool, - name=framework_name, - callbacks=callbacks, - task_staging_timeout_s=framework_staging_timeout, - initial_decline_delay=initial_decline_delay, - framework_id=framework_id, - ) - - # TODO: Get mesos master ips from smartstack - self.driver = MesosSchedulerDriver( - sched=self.execution_framework, - framework=self.execution_framework.framework_info, - use_addict=True, - master_uri=mesos_address, - implicit_acknowledgements=False, - principal=principal, - secret=secret, - failover=failover, - ) - - # start driver thread immediately - self.stopping = False - self.driver_thread = threading.Thread(target=self._run_driver, args=()) - self.driver_thread.daemon = True - self.driver_thread.start() - - def _run_driver(self): - while not self.stopping: - self.driver.run() - self.logger.warning("Driver stopped, starting again") - - def run(self, task_config): - self.execution_framework.enqueue_task(task_config) - - def reconcile(self, task_config): - self.execution_framework.reconcile_task(task_config) - - def kill(self, task_id): - return self.execution_framework.kill_task(task_id) - - def stop(self): - self.stopping = True - self.execution_framework.stop() - self.driver.stop(failover=self.failover) - self.driver.join() - - def get_event_queue(self): - return self.execution_framework.event_queue diff --git a/task_processing/plugins/mesos/mesos_pod_executor.py b/task_processing/plugins/mesos/mesos_pod_executor.py deleted file mode 100644 index e869fdea..00000000 --- a/task_processing/plugins/mesos/mesos_pod_executor.py +++ /dev/null @@ -1,6 +0,0 @@ -from task_processing.plugins.mesos.mesos_executor import MesosExecutor - - -class MesosPodExecutor(MesosExecutor): - def __init__(self, role, *args, **kwargs) -> None: - raise NotImplementedError diff --git a/task_processing/plugins/mesos/mesos_task_executor.py b/task_processing/plugins/mesos/mesos_task_executor.py deleted file mode 100644 index 4acd51cc..00000000 --- a/task_processing/plugins/mesos/mesos_task_executor.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import List -from typing import Tuple - -from task_processing.plugins.mesos.constraints import attributes_match_constraints -from task_processing.plugins.mesos.mesos_executor import MesosExecutor -from task_processing.plugins.mesos.mesos_executor import MesosExecutorCallbacks -from task_processing.plugins.mesos.resource_helpers import allocate_task_resources -from task_processing.plugins.mesos.resource_helpers import ResourceSet -from task_processing.plugins.mesos.resource_helpers import task_fits -from task_processing.plugins.mesos.task_config import MesosTaskConfig -from task_processing.plugins.mesos.translator import make_mesos_task_info -from task_processing.plugins.mesos.translator import mesos_update_to_event - - -def get_tasks_for_offer( - task_configs: List[MesosTaskConfig], - offer_resources: ResourceSet, - offer_attributes: dict, - role: str, -) -> Tuple[List[MesosTaskConfig], List[MesosTaskConfig]]: - tasks_to_launch, tasks_to_defer = [], [] - - for task_config in task_configs: - if task_fits(task_config, offer_resources) and attributes_match_constraints( - offer_attributes, task_config.constraints - ): - prepared_task_config, offer_resources = allocate_task_resources( - task_config, - offer_resources, - ) - tasks_to_launch.append(prepared_task_config) - else: - tasks_to_defer.append(task_config) - - return tasks_to_launch, tasks_to_defer - - -class MesosTaskExecutor(MesosExecutor): - TASK_CONFIG_INTERFACE = MesosTaskConfig - - def __init__(self, role, *args, **kwargs) -> None: - super().__init__( - role, - MesosExecutorCallbacks( - get_tasks_for_offer, - mesos_update_to_event, - make_mesos_task_info, - ), - *args, - **kwargs, - ) diff --git a/task_processing/plugins/mesos/metrics.py b/task_processing/plugins/mesos/metrics.py deleted file mode 100644 index b2275fd9..00000000 --- a/task_processing/plugins/mesos/metrics.py +++ /dev/null @@ -1,22 +0,0 @@ -TASK_LAUNCHED_COUNT = "taskproc.mesos.task_launched_count" -TASK_FAILED_TO_LAUNCH_COUNT = "taskproc.mesos.tasks_failed_to_launch_count" -TASK_LAUNCH_FAILED_COUNT = "taskproc.mesos.task_launch_failed_count" -TASK_FINISHED_COUNT = "taskproc.mesos.task_finished_count" -TASK_FAILED_COUNT = "taskproc.mesos.task_failure_count" -TASK_KILLED_COUNT = "taskproc.mesos.task_killed_count" -TASK_LOST_COUNT = "taskproc.mesos.task_lost_count" -TASK_LOST_DUE_TO_INVALID_OFFER_COUNT = ( - "taskproc.mesos.task_lost_due_to_invalid_offer_count" -) -TASK_ERROR_COUNT = "taskproc.mesos.task_error_count" -TASK_OFFER_TIMEOUT = "taskproc.mesos.task_offer_timeout" - -TASK_ENQUEUED_COUNT = "taskproc.mesos.task_enqueued_count" -TASK_QUEUED_TIME_TIMER = "taskproc.mesos.task_queued_time" -TASK_INSUFFICIENT_OFFER_COUNT = "taskproc.mesos.task_insufficient_offer_count" -TASK_STUCK_COUNT = "taskproc.mesos.task_stuck_count" - -OFFER_DELAY_TIMER = "taskproc.mesos.offer_delay" -BLACKLISTED_AGENTS_COUNT = "taskproc.mesos.blacklisted_agents_count" - -BGCHECK_TIME_TIMER = "taskproc.mesos.bgcheck_time" diff --git a/task_processing/plugins/mesos/resource_helpers.py b/task_processing/plugins/mesos/resource_helpers.py deleted file mode 100644 index ebb65965..00000000 --- a/task_processing/plugins/mesos/resource_helpers.py +++ /dev/null @@ -1,97 +0,0 @@ -from typing import Tuple -from typing import TYPE_CHECKING - -import addict -from pyrsistent import field -from pyrsistent import m -from pyrsistent import PMap -from pyrsistent import pmap -from pyrsistent import PRecord -from pyrsistent import PVector -from pyrsistent import pvector -from pyrsistent import v - -from task_processing.plugins.mesos.task_config import MesosTaskConfig - -NUMERIC_RESOURCE = field( - type=float, - initial=0.0, - factory=float, - invariant=lambda x: (x >= 0, "resource < 0"), -) -_NUMERIC_RESOURCES = frozenset(["cpus", "mem", "disk", "gpus"]) - - -class ResourceSet(PRecord): - cpus = NUMERIC_RESOURCE - mem = NUMERIC_RESOURCE - disk = NUMERIC_RESOURCE - gpus = NUMERIC_RESOURCE - ports = field( - type=(PVector[PMap] if TYPE_CHECKING else PVector), initial=v(), factory=pvector - ) - - -def get_offer_resources(offer: addict.Dict, role: str) -> ResourceSet: - """Get the resources from a Mesos offer - - :param offer: the payload from a Mesos resourceOffer call - :param role: the Mesos role we want to get resources for - :returns: a mapping from resource name -> available resources for the offer - """ - res = ResourceSet() - for resource in offer.resources: - if resource.role != role: - continue - - if resource.name in _NUMERIC_RESOURCES: - res = res.set(resource.name, resource.scalar.value) - elif resource.name == "ports": - res = res.set("ports", [pmap(r) for r in resource.ranges.range]) - return res - - -def allocate_task_resources( - task_config: MesosTaskConfig, - offer_resources: ResourceSet, -) -> Tuple[MesosTaskConfig, ResourceSet]: - """Allocate a task's resources to a Mesos offer - - :param task: the specification for the task to allocate - :param offer_resources: a mapping of resource name -> available resources - (should come from :func:`get_offer_resources`) - :returns: a pair of (`prepared_task_config`, `remaining_resources`), where - `prepared_task_config` is the task_config object modified with the - actual resources consumed - """ - for res, val in offer_resources.items(): - if res not in _NUMERIC_RESOURCES: - continue - offer_resources = offer_resources.set(res, val - task_config[res]) - - port = offer_resources.ports[0]["begin"] - if offer_resources.ports[0]["begin"] == offer_resources.ports[0]["end"]: - avail_ports = offer_resources.ports[1:] - else: - new_port_range = offer_resources.ports[0].set("begin", port + 1) - avail_ports = offer_resources.ports.set(0, new_port_range) - offer_resources = offer_resources.set("ports", avail_ports) - task_config = task_config.set("ports", v(m(begin=port, end=port))) - return task_config, offer_resources - - -def task_fits(task: MesosTaskConfig, offer_resources: ResourceSet) -> bool: - """Check to see if a task fits a given offer's resources - - :param task: the task specification to check - :param offer_resources: a mapping of resource name -> available resources - (should come from :func:`get_offer_resources`) - :returns: True if the offer has enough resources for the task, False otherwise - """ - for rname, value in offer_resources.items(): - if rname in _NUMERIC_RESOURCES and task[rname] > value: - return False - elif rname == "ports" and len(value) == 0: # TODO validate port ranges - return False - - return True diff --git a/task_processing/plugins/mesos/retrying_executor.py b/task_processing/plugins/mesos/retrying_executor.py deleted file mode 100644 index cc1bbd10..00000000 --- a/task_processing/plugins/mesos/retrying_executor.py +++ /dev/null @@ -1,167 +0,0 @@ -import logging -import time -from queue import Queue -from threading import Lock -from threading import Thread - -from pyrsistent import m - -from task_processing.interfaces.task_executor import TaskExecutor - -log = logging.getLogger(__name__) - - -class RetryingExecutor(TaskExecutor): - def __init__( - self, downstream_executor, retry_pred=lambda e: not e.success, retries=3 - ): - self.executor = downstream_executor - self.retries = retries - self.retry_pred = retry_pred - - self.task_retries = m() - self.task_retries_lock = Lock() - - self.src_queue = downstream_executor.get_event_queue() - self.dest_queue = Queue() - self.stopping = False - - self.retry_thread = Thread(target=self.retry_loop) - self.retry_thread.daemon = True - self.retry_thread.start() - - def event_with_retries(self, event): - return event.transform( - ("extensions", "RetryingExecutor/tries"), - "{}/{}".format(self.task_retries[event.task_id], self.retries), - ) - - def retry(self, event): - retries_remaining = self.task_retries[event.task_id] - if retries_remaining <= 0: - return False - - total_retries = self._task_or_executor_retries(event.task_config) - log.info( - "Retrying task {}, {} of {}, fail event: {}".format( - event.task_config.name, - total_retries - retries_remaining + 1, - total_retries, - event.raw, - ) - ) - - with self.task_retries_lock: - self.task_retries = self.task_retries.set( - event.task_id, retries_remaining - 1 - ) - self.run(event.task_config) - - return True - - def retry_loop(self): - while True: - while not self.src_queue.empty(): - e = self.src_queue.get() - - if e.kind != "task": - self.dest_queue.put(e) - continue - - # This is to remove trailing '-retry*' - original_task_id = "-".join( - [item for item in e.task_id.split("-")[:-1]] - ) - - # Check if the update is for current attempt. Discard if - # it is not. - if not self._is_current_attempt(e, original_task_id): - continue - - # Set the task id back to original task_id - e = self._restore_task_id(e, original_task_id) - - e = self.event_with_retries(e) - - if e.terminal: - if self.retry_pred(e): - if self.retry(e): - continue - - with self.task_retries_lock: - self.task_retries = self.task_retries.remove(e.task_id) - - self.dest_queue.put(e) - - if self.stopping: - return - - time.sleep(1) - - def run(self, task_config): - if task_config.task_id not in self.task_retries: - with self.task_retries_lock: - self.task_retries = self.task_retries.set( - task_config.task_id, self._task_or_executor_retries(task_config) - ) - self.executor.run(self._task_config_with_retry(task_config)) - - def reconcile(self, task_config): - self.executor.reconcile(task_config) - - def kill(self, task_id): - # retries = -1 so that manually killed tasks can be distinguished - with self.task_retries_lock: - self.task_retries = self.task_retries.set(task_id, -1) - return self.executor.kill(task_id) - - def stop(self): - self.executor.stop() - self.stopping = True - self.retry_thread.join() - - def get_event_queue(self): - return self.dest_queue - - def _task_config_with_retry(self, task_config): - return task_config.set( - uuid="{id}-retry{attempt}".format( - id=task_config.uuid, attempt=self.task_retries[task_config.task_id] - ) - ) - - def _restore_task_id(self, e, original_task_id): - task_config = e.task_config.set( - uuid="-".join([item for item in str(e.task_config.uuid).split("-")[:-1]]) - ) - - # Set the task id back to original task_id - return e.set( - task_id=original_task_id, - task_config=task_config, - ) - - def _is_current_attempt(self, e, original_task_id): - retry_suffix = "-".join([item for item in e.task_id.split("-")[-1:]]) - - # This is to extract retry attempt from retry_suffix - # eg: if retry_suffix= 'retry2', then attempt==2 - attempt = int(retry_suffix[5:]) - - # This is to reregister a task with the retry executor in the event - # of reconciliation and attempts were lost - with self.task_retries_lock: - if original_task_id not in self.task_retries: - self.task_retries = self.task_retries.set( - original_task_id, - attempt, - ) - return True - - if attempt == self.task_retries[original_task_id]: - return True - - return False - - def _task_or_executor_retries(self, task_config): - return task_config.retries if "retries" in task_config else self.retries diff --git a/task_processing/plugins/mesos/task_config.py b/task_processing/plugins/mesos/task_config.py deleted file mode 100644 index 7b7ca5a8..00000000 --- a/task_processing/plugins/mesos/task_config.py +++ /dev/null @@ -1,135 +0,0 @@ -import uuid -from typing import Sequence -from typing import TYPE_CHECKING - -from pyrsistent import field -from pyrsistent import m -from pyrsistent import PMap -from pyrsistent import pmap -from pyrsistent import PVector -from pyrsistent import pvector -from pyrsistent import v - -from task_processing.interfaces.task_executor import DefaultTaskConfigInterface -from task_processing.plugins.mesos.constraints import Constraint -from task_processing.plugins.mesos.constraints import valid_constraint_operator_name - -VOLUME_KEYS = set(["mode", "container_path", "host_path"]) - - -def valid_volumes(volumes): - for vol in volumes: - if set(vol.keys()) != VOLUME_KEYS: - return ( - False, - "Invalid volume format, must only contain following keys: " - "{}, was: {}".format(VOLUME_KEYS, vol.keys()), - ) - return (True, None) - - -def _valid_constraints(constraints): - invalid_operators = [] - for constraint in constraints: - operator_name = constraint.operator - if not valid_constraint_operator_name(operator_name): - invalid_operators.append(operator_name) - if len(invalid_operators) > 0: - return (False, "Invalid constraint operator names: {}".format(operator_name)) - else: - return (True, None) - - -class MesosTaskConfig(DefaultTaskConfigInterface): - def __invariant__(conf): - return ( - ( - "image" in conf if conf.containerizer == "DOCKER" else True, - "Image required for chosen containerizer", - ), - ( - len(conf.task_id) <= 255, - "task_id is longer than 255 chars: {}".format(conf.task_id), - ), - ) - - uuid = field(type=(str, uuid.UUID), initial=uuid.uuid4) # type: ignore - name = field(type=str, initial="default") - # image is optional for the mesos containerizer - image = field(type=str) - cmd = field( - type=str, mandatory=True, invariant=lambda cmd: (cmd.strip() != "", "empty cmd") - ) - cpus = field( - type=float, initial=0.1, factory=float, invariant=lambda c: (c > 0, "cpus > 0") - ) - mem = field( - type=float, - initial=32.0, - factory=float, - invariant=lambda m: (m >= 32, "mem is >= 32"), - ) - disk = field( - type=float, initial=10.0, factory=float, invariant=lambda d: (d > 0, "disk > 0") - ) - gpus = field( - type=int, initial=0, factory=int, invariant=lambda g: (g >= 0, "gpus >= 0") - ) - timeout = field( - type=float, - factory=float, - mandatory=False, - invariant=lambda t: (t > 0, "timeout > 0"), - ) - # By default, the retrying executor retries 3 times. This task option - # overrides the executor setting. - retries = field( - type=int, - factory=int, - mandatory=False, - invariant=lambda r: (r >= 0, "retries >= 0"), - ) - volumes = field(type=PVector, initial=v(), factory=pvector, invariant=valid_volumes) - ports = field( - type=(PVector[PMap] if TYPE_CHECKING else PVector), initial=v(), factory=pvector - ) - cap_add = field(type=PVector, initial=v(), factory=pvector) - ulimit = field(type=PVector, initial=v(), factory=pvector) - uris = field(type=PVector, initial=v(), factory=pvector) - # TODO: containerization + containerization_args ? - docker_parameters = field(type=PVector, initial=v(), factory=pvector) - containerizer = field( - type=str, - initial="DOCKER", - invariant=lambda c: ( - c == "DOCKER" or c == "MESOS", - "containerizer is docker or mesos", - ), - ) - environment = field(type=PMap, initial=m(), factory=pmap) - offer_timeout = field( - type=float, - initial=60.0, - factory=float, - invariant=lambda t: (t > 0, "timeout > 0"), - ) - constraints = field( - type=(Sequence[Constraint] if TYPE_CHECKING else PVector), - initial=v(), - factory=lambda c: pvector( - (Constraint(attribute=v[0], operator=v[1], value=v[2]) for v in c) - ), - invariant=_valid_constraints, - ) - use_cached_image = field(type=bool, initial=True, factory=bool) - - @property - def task_id(self): - return "{}.{}".format(self.name, self.uuid) - - def set_task_id(self, task_id): - try: - name, uuid = task_id.rsplit(".", maxsplit=1) - except ValueError: - raise ValueError(f"Invalid format for task_id {task_id}") - return self.set(name=name, uuid=uuid) diff --git a/task_processing/plugins/mesos/timeout_executor.py b/task_processing/plugins/mesos/timeout_executor.py deleted file mode 100644 index e7778ba9..00000000 --- a/task_processing/plugins/mesos/timeout_executor.py +++ /dev/null @@ -1,132 +0,0 @@ -import collections -import logging -import time -from queue import Queue -from threading import Lock -from threading import Thread - -from task_processing.interfaces.task_executor import TaskExecutor - -log = logging.getLogger(__name__) - -TaskEntry = collections.namedtuple("TaskEntry", ["task_id", "deadline"]) - - -class TimeoutExecutor(TaskExecutor): - def __init__(self, downstream_executor): - self.downstream_executor = downstream_executor - - self.tasks_lock = Lock() - # Tasks that are pending termination - self.killed_tasks = [] - # Tasks that are currently running - self.running_tasks = [] - - self.src_queue = downstream_executor.get_event_queue() - self.dest_queue = Queue() - self.stopping = False - - self.timeout_thread = Thread(target=self.timeout_loop) - self.timeout_thread.daemon = True - self.timeout_thread.start() - - def timeout_loop(self): - while True: - # process downstream events - while not self.src_queue.empty(): - e = self.src_queue.get() - self.dest_queue.put(e) - - if not e.kind == "task": - continue - elif not e.terminal: - with self.tasks_lock: - if e.task_id not in [ - entry.task_id for entry in self.running_tasks - ]: - # No record of e's task_id in self.running_tasks, - # so we need to add it back in. We lack access to - # the original time the task was started, so to set - # a deadline, we use e's timestamp as a baseline. - new_entry = TaskEntry( - task_id=e.task_id, - deadline=e.task_config.timeout + e.timestamp, - ) - self._insert_new_running_task_entry(new_entry) - else: - # Update running and killed tasks - with self.tasks_lock: - for idx, entry in enumerate(self.running_tasks): - if e.task_id == entry.task_id: - self.running_tasks.pop(idx) - break - if e.task_id in self.killed_tasks: - self.killed_tasks.remove(e.task_id) - - # Check timeouts - current_time = time.time() - with self.tasks_lock: - delete_idx = None - for idx, entry in enumerate(self.running_tasks): - if entry.deadline < current_time: - log.info("Killing task {}: timed out".format(entry.task_id)) - self.downstream_executor.kill(entry.task_id) - self.killed_tasks.append(entry.task_id) - delete_idx = idx - # Skip the rest of tasks in the list because they are - # appended to the list later. - else: - break - if delete_idx is not None: - self.running_tasks = self.running_tasks[delete_idx + 1 :] - - if self.stopping: - return - - # Since src_queue has to be polled continuously, sleep(1) is used. - # Otherwise, a notify() from run() plus wait(delta between now and - # the earliest deadline) is more efficient. - time.sleep(1) - - def run(self, task_config): - # Tasks are dynamically added and removed from running_tasks and - # and killed_tasks. It's preferable for the client or execution - # framework to check for duplicated tasks. The duplicate task check does - # NOT happen here. - new_entry = TaskEntry( - task_id=task_config.task_id, deadline=task_config.timeout + time.time() - ) - with self.tasks_lock: - self._insert_new_running_task_entry(new_entry) - - self.downstream_executor.run(task_config) - - def reconcile(self, task_config): - self.downstream_executor.reconcile(task_config) - - def kill(self, task_id): - with self.tasks_lock: - for idx, entry in enumerate(self.running_tasks): - if task_id == entry.task_id: - log.info("Killing task {}: requested".format(task_id)) - result = self.downstream_executor.kill(task_id) - if result is not False: - self.running_tasks.pop(idx) - self.killed_tasks.append(task_id) - return result - - def stop(self): - self.downstream_executor.stop() - self.stopping = True - self.timeout_thread.join() - - def get_event_queue(self): - return self.dest_queue - - def _insert_new_running_task_entry(self, new_entry): - # Insertion sort for task entries in self.running_tasks - for idx, entry in enumerate(self.running_tasks): - if new_entry.deadline <= entry.deadline: - self.running_tasks.insert(idx, new_entry) - return - self.running_tasks.append(new_entry) diff --git a/task_processing/plugins/mesos/translator.py b/task_processing/plugins/mesos/translator.py deleted file mode 100644 index 421ba9ab..00000000 --- a/task_processing/plugins/mesos/translator.py +++ /dev/null @@ -1,146 +0,0 @@ -import time -from typing import List - -import addict -from pyrsistent import thaw - -from task_processing.interfaces.event import Event -from task_processing.interfaces.event import task_event -from task_processing.plugins.mesos.task_config import MesosTaskConfig - -# https://github.com/apache/mesos/blob/master/include/mesos/mesos.proto - - -def make_mesos_container_info(task_config: MesosTaskConfig) -> addict.Dict: - container_info = addict.Dict( - type=task_config.containerizer, - volumes=thaw(task_config.volumes), - ) - port_mappings = [ - addict.Dict(host_port=task_config.ports[0]["begin"], container_port=8888) - ] - if container_info.type == "DOCKER": - container_info.docker = addict.Dict( - image=task_config.image, - network="BRIDGE", - port_mappings=port_mappings, - parameters=thaw(task_config.docker_parameters), - force_pull_image=(not task_config.use_cached_image), - ) - elif container_info.type == "MESOS": - container_info.network_infos = addict.Dict(port_mappings=port_mappings) - # For this to work, image_providers needs to be set to 'docker' on mesos agents (as opposed - # to 'appc' or 'oci'; we're still running docker images, we're just - # using the UCR to do it). - if "image" in task_config: - container_info.mesos.image = addict.Dict( - type="DOCKER", # not 'APPC' or 'OCI' - docker=addict.Dict(name=task_config.image), - cached=task_config.use_cached_image, - ) - return container_info - - -def make_mesos_resources( - task_config: MesosTaskConfig, - role: str, -) -> List[addict.Dict]: - return [ - addict.Dict( - name="cpus", - type="SCALAR", - role=role, - scalar=addict.Dict(value=task_config.cpus), - ), - addict.Dict( - name="mem", - type="SCALAR", - role=role, - scalar=addict.Dict(value=task_config.mem), - ), - addict.Dict( - name="disk", - type="SCALAR", - role=role, - scalar=addict.Dict(value=task_config.disk), - ), - addict.Dict( - name="gpus", - type="SCALAR", - role=role, - scalar=addict.Dict(value=task_config.gpus), - ), - addict.Dict( - name="ports", - type="RANGES", - role=role, - ranges=addict.Dict(range=thaw(task_config.ports)), - ), - ] - - -def make_mesos_command_info(task_config: MesosTaskConfig) -> addict.Dict: - return addict.Dict( - value=task_config.cmd, - uris=[addict.Dict(value=uri, extract=False) for uri in task_config.uris], - environment=make_task_environment_variables(task_config=task_config), - ) - - -def make_task_environment_variables(task_config: MesosTaskConfig) -> addict.Dict: - env = dict(task_config.environment.items()) - env["MESOS_TASK_ID"] = task_config.task_id # type: ignore - return addict.Dict(variables=[addict.Dict(name=k, value=v) for k, v in env.items()]) - - -def make_mesos_task_info( - task_config: MesosTaskConfig, - agent_id: str, - role: str, -) -> addict.Dict: - container_info = make_mesos_container_info(task_config) - resources = make_mesos_resources(task_config, role) - command_info = make_mesos_command_info(task_config) - - return addict.Dict( - task_id=addict.Dict(value=task_config.task_id), - agent_id=addict.Dict(value=agent_id), - name=f"executor-{task_config.task_id}", - resources=resources, - command=command_info, - container=container_info, - ) - - -MESOS_STATUS_MAP = { - "TASK_STARTING": addict.Dict(platform_type="starting", terminal=False), - "TASK_RUNNING": addict.Dict(platform_type="running", terminal=False), - "TASK_FINISHED": addict.Dict(platform_type="finished", terminal=True, success=True), - "TASK_FAILED": addict.Dict(platform_type="failed", terminal=True, success=False), - "TASK_KILLED": addict.Dict(platform_type="killed", terminal=True, success=False), - "TASK_LOST": addict.Dict(platform_type="lost", terminal=True, success=False), - "TASK_STAGING": addict.Dict(platform_type="staging", terminal=False), - "TASK_ERROR": addict.Dict(platform_type="error", terminal=True, success=False), - "TASK_KILLING": addict.Dict(platform_type="killing", terminal=False), - "TASK_DROPPED": addict.Dict(platform_type="dropped", terminal=True, success=False), - "TASK_UNREACHABLE": addict.Dict(platform_type="unreachable", terminal=False), - "TASK_GONE": addict.Dict(platform_type="gone", terminal=True, success=False), - "TASK_GONE_BY_OPERATOR": addict.Dict( - platform_type="gone_by_operator", terminal=True, success=False - ), - "TASK_UNKNOWN": addict.Dict(platform_type="unknown", terminal=False), - "TASK_STUCK": addict.Dict(platform_type="unknown", terminal=False), -} - - -def mesos_update_to_event( - mesos_status: addict.Dict, task_config: MesosTaskConfig -) -> Event: - kwargs = dict( - raw=mesos_status, - task_id=task_config.task_id, - task_config=task_config, - timestamp=time.time(), - ) - kwargs.update(MESOS_STATUS_MAP[mesos_status.state]) - return task_event(**kwargs) diff --git a/task_processing/plugins/persistence/__init__.py b/task_processing/plugins/persistence/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/task_processing/plugins/persistence/dynamodb_persistence.py b/task_processing/plugins/persistence/dynamodb_persistence.py deleted file mode 100644 index 0c84be52..00000000 --- a/task_processing/plugins/persistence/dynamodb_persistence.py +++ /dev/null @@ -1,72 +0,0 @@ -import decimal - -import boto3.session as bsession -from boto3.dynamodb.conditions import Key -from pyrsistent import thaw - -from task_processing.interfaces.persistence import Persister - - -class DynamoDBPersister(Persister): - def __init__(self, table_name, endpoint_url=None, session=None): - self.table_name = table_name - if not session: - session = bsession.Session() - self.ddb_client = session.client( - service_name="dynamodb", - endpoint_url=endpoint_url, - ) - self.table = session.resource( - endpoint_url=endpoint_url, service_name="dynamodb" - ).Table(table_name) - - def read(self, task_id, comparison_operator="EQ"): - res = self.table.query(KeyConditionExpression=Key("task_id").eq(task_id)) - return [self.item_to_event(item) for item in res["Items"]] - - def write(self, event): - if event.kind == "control": - return None - return self.ddb_client.put_item( - TableName=self.table_name, Item=self._event_to_item(event)["M"] - ) - - def _event_to_item(self, e): - raw = thaw(e) - if type(raw) is dict: - resp = {} - for k, v in raw.items(): - if type(v) is str: - resp[k] = {"S": v} - elif type(v) is bool: - resp[k] = {"BOOL": v} - elif isinstance(v, (int, float)): - resp[k] = {"N": str(v)} - elif type(v) is dict: - resp[k] = self._event_to_item(v) - elif type(v) is list: - if len(v) > 0: - vals = [] - for i in v: - vals.append(self._event_to_item(i)) - resp[k] = {"L": vals} - return {"M": resp} - elif type(raw) is str: - return {"S": raw} - elif type(raw) in [int, float]: - return {"N": str(raw)} - else: - print("Missed converting key %s type %s" % (raw, type(raw))) - - def item_to_event(self, obj): - return self._replace_decimals(obj) - - def _replace_decimals(self, obj): - if isinstance(obj, list): - return [self._replace_decimals(x) for x in obj] - elif isinstance(obj, dict): - return {k: self._replace_decimals(v) for k, v in obj.items()} - elif isinstance(obj, decimal.Decimal): - return float(obj) - else: - return obj diff --git a/task_processing/plugins/persistence/file_persistence.py b/task_processing/plugins/persistence/file_persistence.py deleted file mode 100644 index 30bd297b..00000000 --- a/task_processing/plugins/persistence/file_persistence.py +++ /dev/null @@ -1,27 +0,0 @@ -import json - -from pyrsistent import thaw -from pyrsistent import v - -from task_processing.interfaces.event import Event -from task_processing.interfaces.event import json_deserializer -from task_processing.interfaces.event import json_serializer -from task_processing.interfaces.persistence import Persister - - -class FilePersistence(Persister): - def __init__(self, output_file): - self.output_file = output_file - - def read(self, task_id): - acc = v() - with open(self.output_file, "r") as f: - for line in f: - parsed = json.loads(line, object_hook=json_deserializer) - if parsed["task_id"] == task_id: - acc = acc.append(Event.create(parsed)) - return acc - - def write(self, event): - with open(self.output_file, "a+") as f: - f.write("{}\n".format(json.dumps(thaw(event), default=json_serializer))) diff --git a/task_processing/plugins/stateful/__init__.py b/task_processing/plugins/stateful/__init__.py deleted file mode 100644 index 3b3ffbab..00000000 --- a/task_processing/plugins/stateful/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from .stateful_executor import StatefulTaskExecutor - - -TASK_PROCESSING_PLUGIN = "stateful_plugin" - - -def register_plugin(registry): - return registry.register_task_executor("stateful", StatefulTaskExecutor) diff --git a/task_processing/plugins/stateful/stateful_executor.py b/task_processing/plugins/stateful/stateful_executor.py deleted file mode 100644 index 252fb09a..00000000 --- a/task_processing/plugins/stateful/stateful_executor.py +++ /dev/null @@ -1,49 +0,0 @@ -import logging -import threading -import traceback -from queue import Queue - -from task_processing.interfaces.task_executor import TaskExecutor - -log = logging.getLogger(__name__) - - -class StatefulTaskExecutor(TaskExecutor): - """ """ - - def __init__(self, downstream_executor, persister): - self.downstream_executor = downstream_executor - self.writer_queue = Queue() - self.queue_for_processed_events = Queue() - self.persister = persister - worker_thread = threading.Thread(target=self.subscribe_to_updates_for_task) - worker_thread.daemon = True - worker_thread.start() - - def run(self, task_config): - self.downstream_executor.run(task_config) - - def reconcile(self, task_config): - self.downstream_executor.reconcile(task_config) - - def kill(self, task_id): - return self.downstream_executor.kill(task_id) - - def status(self, task_id): - return sorted(self.persister.read(task_id), key=lambda x: x["timestamp"]) - - def stop(self): - return self.downstream_executor.stop() - - def get_event_queue(self): - return self.queue_for_processed_events - - def subscribe_to_updates_for_task(self): - while True: - result = self.downstream_executor.get_event_queue().get() - try: - self.persister.write(event=result) - except Exception: - log.error(traceback.format_exc()) - self.queue_for_processed_events.put(result) - self.downstream_executor.get_event_queue().task_done() diff --git a/tests/integration/cluster b/tests/integration/cluster deleted file mode 120000 index c70625a2..00000000 --- a/tests/integration/cluster +++ /dev/null @@ -1 +0,0 @@ -../../examples/cluster \ No newline at end of file diff --git a/tests/integration/mesos/features/sync.feature b/tests/integration/mesos/features/sync.feature deleted file mode 100644 index 4c539496..00000000 --- a/tests/integration/mesos/features/sync.feature +++ /dev/null @@ -1,9 +0,0 @@ -Feature: Mesos Sync runner - -Scenario: Running single task - Given working mesos platform - And mesos executor with sync runner - When I launch a task - Then it should block until finished - And print status running - And print status finished diff --git a/tests/integration/mesos/mesos_test.py b/tests/integration/mesos/mesos_test.py deleted file mode 100644 index 6726bd19..00000000 --- a/tests/integration/mesos/mesos_test.py +++ /dev/null @@ -1,29 +0,0 @@ -from pytest_bdd import given -from pytest_bdd import then -from pytest_bdd import when - -from task_processing.plugins.mesos.mesos_executor import MesosExecutor -from task_processing.runners.sync import Sync - - -@given("mesos executor with {runner} runner") -def mesos_executor_runner(runner): - executor = MesosExecutor(role="mock-role") - - if runner == "sync": - runner_instance = Sync(executor=executor) - else: - raise "unknown runner: {}".format(runner) - - return {"executor": executor, "runner": runner_instance} - - -@when("I launch a task") -def launch_task(mesos_executor_runner): - print(mesos_executor_runner) - return - - -@then("it should block until finished") -def block_until_finished(): - return diff --git a/tests/unit/plugins/mesos/conftest.py b/tests/unit/plugins/mesos/conftest.py deleted file mode 100644 index 7e6b0894..00000000 --- a/tests/unit/plugins/mesos/conftest.py +++ /dev/null @@ -1,86 +0,0 @@ -import addict -import mock -import pytest -from pyrsistent import m -from pyrsistent import v - -from task_processing.plugins.mesos.task_config import MesosTaskConfig - - -@pytest.fixture -def fake_task(): - return MesosTaskConfig( - name="fake_name", - cpus=10.0, - mem=1024.0, - disk=1000.0, - gpus=1, - ports=v(m(begin=31200, end=31200)), - image="fake_image", - cmd='echo "fake"', - ) - - -@pytest.fixture -def fake_offer(): - return addict.Dict( - id=addict.Dict(value="fake_offer_id"), - agent_id=addict.Dict(value="fake_agent_id"), - hostname="fake_hostname", - resources=[ - addict.Dict( - role="fake_role", - name="cpus", - scalar=addict.Dict(value=10), - type="SCALAR", - ), - addict.Dict( - role="other_fake_role", - name="cpus", - scalar=addict.Dict(value=20), - type="SCALAR", - ), - addict.Dict( - role="fake_role", - name="mem", - scalar=addict.Dict(value=1024), - type="SCALAR", - ), - addict.Dict( - role="fake_role", - name="disk", - scalar=addict.Dict(value=1000), - type="SCALAR", - ), - addict.Dict( - role="fake_role", - name="gpus", - scalar=addict.Dict(value=1), - type="SCALAR", - ), - addict.Dict( - role="fake_role", - name="ports", - ranges=addict.Dict(range=[addict.Dict(begin=31200, end=31500)]), - type="RANGES", - ), - ], - attributes=[ - addict.Dict(name="pool", text=addict.Dict(value="fake_pool_text")), - addict.Dict( - name="region", - text=addict.Dict(value="fake_region_text"), - ), - ], - ) - - -@pytest.fixture -def mock_fw_and_driver(): - with mock.patch( - "task_processing.plugins.mesos.mesos_executor.ExecutionFramework" - ) as mock_execution_framework, mock.patch( - "task_processing.plugins.mesos.mesos_executor.MesosSchedulerDriver" - ) as mock_scheduler_driver: - mock_execution_framework.return_value.framework_info = mock.Mock() - yield mock_execution_framework, mock_scheduler_driver diff --git a/tests/unit/plugins/mesos/constraints_test.py b/tests/unit/plugins/mesos/constraints_test.py deleted file mode 100644 index c7f73d54..00000000 --- a/tests/unit/plugins/mesos/constraints_test.py +++ /dev/null @@ -1,284 +0,0 @@ -import pytest -from pyrsistent import m - -from task_processing.plugins.mesos.constraints import attributes_match_constraints -from task_processing.plugins.mesos.constraints import Constraint - - -@pytest.fixture -def fake_dict(): - return m( - pool="fake_pool_text", - region="fake_region_text", - ) - - -def test_constraints_eq_match(fake_dict): - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="==", - value="fake_region_text", - ), - ], - ) - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="fake_attribute_name", - operator="==", - value="random_text", - ), - ], - ) - - -def test_constraints_eq_not_match(fake_dict): - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="==", - value="another_fake_region_text", - ), - ], - ) - - -def test_constraints_EQUALS_match(fake_dict): - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="EQUALS", - value="fake_region_text", - ), - ], - ) - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="fake_attribute_name", - operator="EQUALS", - value="random_text", - ), - ], - ) - - -def test_constraints_EQUALS_not_match(fake_dict): - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="EQUALS", - value="another_fake_region_text", - ), - ], - ) - - -def test_constraints_ne_match(fake_dict): - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="!=", - value="another_fake_region_text", - ), - ], - ) - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="fake_attribute_name", - operator="!=", - value="random_text", - ), - ], - ) - - -def test_constraints_ne_not_match(fake_dict): - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="!=", - value="fake_region_text", - ), - ], - ) - - -def test_constraints_NOTEQUALS_match(fake_dict): - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="NOTEQUALS", - value="another_fake_region_text", - ), - ], - ) - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="fake_attribute_name", - operator="NOTEQUALS", - value="random_text", - ), - ], - ) - - -def test_constraints_NOTEQUALS_not_match(fake_dict): - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="NOTEQUALS", - value="fake_region_text", - ), - ], - ) - - -def test_constraints_LIKE_match(fake_dict): - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="LIKE", - value="fak.*t..t", - ), - ], - ) - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="fake_attribute_name", - operator="LIKE", - value="random_text", - ), - ], - ) - - -def test_constraints_LIKE_not_match(fake_dict): - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="LIKE", - value="another_fak.*t..t", - ), - ], - ) - assert not attributes_match_constraints( - fake_dict, - [Constraint(attribute="region", operator="LIKE", value="fake_region")], - ) - - -def test_constraints_UNLIKE_match(fake_dict): - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="UNLIKE", - value="another_fak.*t..t", - ), - ], - ) - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="fake_attribute_name", - operator="UNLIKE", - value="random_text", - ), - ], - ) - - -def test_constraints_UNLIKE_not_match(fake_dict): - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="UNLIKE", - value="fak.*t..t", - ), - ], - ) - - -def test_constraints_all_match(fake_dict): - assert attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="==", - value="fake_region_text", - ), - Constraint( - attribute="pool", - operator="==", - value="fake_pool_text", - ), - ], - ) - - -def test_constraints_all_not_match(fake_dict): - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="==", - value="another_fake_region_text", - ), - Constraint( - attribute="pool", - operator="==", - value="fake_pool_text", - ), - ], - ) - assert not attributes_match_constraints( - fake_dict, - [ - Constraint( - attribute="region", - operator="==", - value="fake_region_text", - ), - Constraint( - attribute="pool", - operator="==", - value="another_fake_pool_text", - ), - ], - ) diff --git a/tests/unit/plugins/mesos/execution_framework_test.py b/tests/unit/plugins/mesos/execution_framework_test.py deleted file mode 100644 index ec319daa..00000000 --- a/tests/unit/plugins/mesos/execution_framework_test.py +++ /dev/null @@ -1,568 +0,0 @@ -import socket -import time -from queue import Queue - -import mock -import pytest -from addict import Dict -from pyrsistent import m - -from task_processing.plugins.mesos import metrics -from task_processing.plugins.mesos.constraints import attributes_match_constraints -from task_processing.plugins.mesos.execution_framework import ExecutionFramework -from task_processing.plugins.mesos.execution_framework import TaskMetadata -from task_processing.plugins.mesos.mesos_executor import MesosExecutorCallbacks -from task_processing.plugins.mesos.task_config import MesosTaskConfig - - -@pytest.fixture -def ef(mock_Thread): - return ExecutionFramework("fake_name", "fake_role", mock.Mock(), 240) - - -@pytest.fixture -def mock_driver(): - with mock.patch("pymesos.MesosSchedulerDriver", autospec=True) as m: - m.id = "mock_driver" - yield m - - -@pytest.fixture -def mock_get_metric(): - with mock.patch( - "task_processing.plugins.mesos.execution_framework.get_metric", - ) as mock_get_metric: - yield mock_get_metric - - -@pytest.fixture -def mock_time(): - with mock.patch.object(time, "time") as mock_time: - yield mock_time - - -@pytest.fixture -def mock_sleep(ef): - def stop_killing(task_id): - ef.stopping = True - - with mock.patch.object(time, "sleep", side_effect=stop_killing) as mock_sleep: - yield mock_sleep - - -def test_ef_kills_stuck_tasks(ef, fake_task, mock_sleep, mock_get_metric): - task_id = fake_task.task_id - task_metadata = TaskMetadata( - agent_id="fake_agent_id", - task_config=fake_task, - task_state="TASK_STAGING", - task_state_history=m(TASK_STAGING=0.0), - ) - ef.task_staging_timeout_s = 0 - ef.kill_task = mock.Mock() - ef.blacklist_slave = mock.Mock() - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - ef.callbacks = MesosExecutorCallbacks(mock.Mock(), mock.Mock(), mock.Mock()) - - ef._background_check() - - assert ef.kill_task.call_count == 1 - assert ef.kill_task.call_args == mock.call(task_id) - assert ef.blacklist_slave.call_count == 1 - assert ef.blacklist_slave.call_args == mock.call( - agent_id="fake_agent_id", timeout=900 - ) - assert mock_get_metric.call_count == 2 - assert mock_get_metric.call_args_list == [ - mock.call(metrics.TASK_STUCK_COUNT), - mock.call(metrics.BGCHECK_TIME_TIMER), - ] - assert mock_get_metric.return_value.count.call_count == 1 - assert mock_get_metric.return_value.count.call_args == mock.call(1) - - -def test_reenqueue_tasks_stuck_in_unknown_state( - ef, fake_task, mock_sleep, mock_get_metric -): - task_id = fake_task.task_id - task_metadata = TaskMetadata( - agent_id="fake_agent_id", - task_config=fake_task, - task_state="UNKNOWN", - task_state_history=m(UNKNOWN=0.0), - ) - ef.task_staging_timeout_s = 0 - ef.kill_task = mock.Mock() - ef.blacklist_slave = mock.Mock() - ef.enqueue_task = mock.Mock() - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - - ef._background_check() - - assert ef.enqueue_task.call_count == 1 - assert ef.enqueue_task.call_args == mock.call(ef.task_metadata[task_id].task_config) - assert mock_get_metric.call_count == 2 - assert mock_get_metric.call_args_list == [ - mock.call(metrics.TASK_FAILED_TO_LAUNCH_COUNT), - mock.call(metrics.BGCHECK_TIME_TIMER), - ] - assert mock_get_metric.return_value.count.call_count == 1 - assert mock_get_metric.return_value.count.call_args == mock.call(1) - - -def test_offer_matches_pool_no_pool(ef, fake_offer): - match, _ = ef.offer_matches_pool(fake_offer) - assert match - - -def test_offer_matches_pool_match(ef, fake_offer): - ef.pool = "fake_pool_text" - match, _ = ef.offer_matches_pool(fake_offer) - - assert match - - -def test_offer_matches_pool_no_match(ef, fake_offer): - ef.pool = "fake_other_pool_text" - match, _ = ef.offer_matches_pool(fake_offer) - - assert not match - - -def test_offer_matches_constraints_no_constraints(ef, fake_task, fake_offer): - attributes = { - attribute.name: attribute.value for attribute in fake_offer.attributes - } - match = attributes_match_constraints(attributes, fake_task.constraints) - assert match - - -def test_offer_matches_constraints_match(ef, fake_offer): - attributes = { - attribute.name: attribute.text.value for attribute in fake_offer.attributes - } - fake_task = MesosTaskConfig( - image="fake_image", - cmd='echo "fake"', - constraints=[ - ["region", "==", "fake_region_text"], - ], - ) - match = attributes_match_constraints(attributes, fake_task.constraints) - assert match - - -def test_offer_matches_constraints_no_match(ef, fake_offer): - attributes = { - attribute.name: attribute.text.value for attribute in fake_offer.attributes - } - fake_task = MesosTaskConfig( - image="fake_image", - cmd='echo "fake"', - constraints=[ - ["region", "==", "another_fake_region_text"], - ], - ) - match = attributes_match_constraints(attributes, fake_task.constraints) - assert not match - - -def test_kill_task(ef, mock_driver): - ef._driver = mock_driver - - ef.kill_task("fake_task_id") - - assert mock_driver.killTask.call_count == 1 - assert mock_driver.killTask.call_args == mock.call(Dict(value="fake_task_id")) - - -def test_kill_task_from_task_queue(ef, mock_driver): - ef.driver = mock_driver - ef.task_queue = Queue() - ef.task_queue.put(mock.Mock(task_id="fake_task_id")) - ef.task_queue.put(mock.Mock(task_id="fake_task_id1")) - - ef.kill_task("fake_task_id") - - assert mock_driver.killTask.call_count == 0 - assert ef.task_queue.qsize() == 1 - - -def test_blacklist_slave(ef, mock_get_metric, mock_time): - agent_id = "fake_agent_id" - mock_time.return_value = 2.0 - - ef.blacklisted_slaves = ef.blacklisted_slaves.append(agent_id) - ef.blacklist_slave(agent_id, timeout=2.0) - - assert agent_id in ef.blacklisted_slaves - assert mock_get_metric.call_count == 1 - assert mock_get_metric.call_args == mock.call(metrics.BLACKLISTED_AGENTS_COUNT) - assert mock_get_metric.return_value.count.call_count == 1 - assert mock_get_metric.return_value.count.call_args == mock.call(1) - - -def test_unblacklist_slave(ef, mock_time, mock_sleep): - agent_id = "fake_agent_id" - - ef.blacklisted_slaves = ef.blacklisted_slaves.append(agent_id) - ef.unblacklist_slave(agent_id, timeout=0.0) - - assert agent_id not in ef.blacklisted_slaves - - -def test_enqueue_task(ef, fake_task, mock_driver, mock_get_metric): - ef.are_offers_suppressed = True - ef._driver = mock_driver - - ef.enqueue_task(fake_task) - - assert ef.task_metadata[fake_task.task_id].task_state == "TASK_INITED" - assert not ef.task_queue.empty() - assert mock_driver.reviveOffers.call_count == 1 - assert not ef.are_offers_suppressed - assert mock_get_metric.call_count == 1 - assert mock_get_metric.call_args == mock.call(metrics.TASK_ENQUEUED_COUNT) - assert mock_get_metric.return_value.count.call_count == 1 - assert mock_get_metric.return_value.count.call_args == mock.call(1) - - -def test_stop(ef): - ef.stop() - - assert ef.stopping - - -def test_initialize_metrics(ef): - default_dimensions = {"framework_name": "fake_name", "framework_role": "fake_role"} - with mock.patch( - "task_processing.plugins.mesos.execution_framework.create_counter", - ) as mock_create_counter, mock.patch( - "task_processing.plugins.mesos.execution_framework.create_timer", - ) as mock_create_timer: - ef._initialize_metrics() - - counters = [ - metrics.TASK_LAUNCHED_COUNT, - metrics.TASK_FINISHED_COUNT, - metrics.TASK_FAILED_COUNT, - metrics.TASK_LAUNCH_FAILED_COUNT, - metrics.TASK_FAILED_TO_LAUNCH_COUNT, - metrics.TASK_KILLED_COUNT, - metrics.TASK_LOST_COUNT, - metrics.TASK_LOST_DUE_TO_INVALID_OFFER_COUNT, - metrics.TASK_ERROR_COUNT, - metrics.TASK_ENQUEUED_COUNT, - metrics.TASK_INSUFFICIENT_OFFER_COUNT, - metrics.TASK_STUCK_COUNT, - metrics.BLACKLISTED_AGENTS_COUNT, - metrics.TASK_OFFER_TIMEOUT, - ] - assert mock_create_counter.call_count == len(counters) - for cnt in counters: - mock_create_counter.assert_any_call(cnt, default_dimensions) - - timers = [ - metrics.TASK_QUEUED_TIME_TIMER, - metrics.OFFER_DELAY_TIMER, - metrics.BGCHECK_TIME_TIMER, - ] - assert mock_create_timer.call_count == len(timers) - for tmr in timers: - mock_create_timer.assert_any_call(tmr, default_dimensions) - - -def test_slave_lost(ef, mock_driver): - ef.slaveLost(mock_driver, "fake_slave_id") - - -def test_registered(ef, mock_driver): - ef.registered(mock_driver, Dict(value="fake_framework_id"), "fake_master_info") - - assert ef._driver == mock_driver - assert ef.event_queue.qsize() == 1 - - -def test_reregistered(ef, mock_driver): - ef.reregistered(mock_driver, "fake_master_info") - - -def test_resource_offers_launch( - ef, fake_task, fake_offer, mock_driver, mock_get_metric, mock_time -): - task_id = fake_task.task_id - ef.driver = mock_driver - ef._last_offer_time = 1.0 - mock_time.return_value = 2.0 - ef.suppress_after = 0.0 - ef.offer_matches_pool = mock.Mock(return_value=(True, None)) - task_metadata = TaskMetadata( - task_config=fake_task, - task_state="fake_state", - task_state_history=m(fake_state=time.time(), TASK_INITED=time.time()), - ) - fake_task_2 = mock.Mock() - ef.callbacks.get_tasks_for_offer = mock.Mock( - return_value=([fake_task], [fake_task_2]) - ) - - ef.task_queue.put(fake_task) - ef.task_queue.put(fake_task_2) - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - ef.resourceOffers(ef.driver, [fake_offer]) - - assert ef.task_metadata[task_id].agent_id == "fake_agent_id" - assert mock_driver.suppressOffers.call_count == 0 - assert not ef.are_offers_suppressed - assert mock_driver.declineOffer.call_count == 0 - assert mock_driver.launchTasks.call_count == 1 - assert mock_get_metric.call_count == 4 - mock_get_metric.assert_any_call(metrics.OFFER_DELAY_TIMER) - mock_get_metric.assert_any_call(metrics.TASK_LAUNCHED_COUNT) - mock_get_metric.assert_any_call(metrics.TASK_QUEUED_TIME_TIMER) - mock_get_metric.assert_any_call(metrics.TASK_INSUFFICIENT_OFFER_COUNT) - assert mock_get_metric.return_value.record.call_count == 2 - assert mock_get_metric.return_value.count.call_count == 2 - - -def test_resource_offers_launch_tasks_failed( - ef, fake_task, fake_offer, mock_driver, mock_get_metric, mock_time -): - task_id = fake_task.task_id - ef.driver = mock_driver - ef.driver.launchTasks = mock.Mock(side_effect=socket.timeout) - ef._last_offer_time = None - mock_time.return_value = 2.0 - ef.suppress_after = 0.0 - ef.offer_matches_pool = mock.Mock(return_value=(True, None)) - task_metadata = TaskMetadata( - task_config=fake_task, - task_state="fake_state", - task_state_history=m(fake_state=time.time(), TASK_INITED=time.time()), - ) - ef.callbacks.get_tasks_for_offer = mock.Mock(return_value=([fake_task], [])) - ef.task_queue.put(fake_task) - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - ef.resourceOffers(ef.driver, [fake_offer]) - - assert mock_driver.suppressOffers.call_count == 0 - assert not ef.are_offers_suppressed - assert mock_driver.declineOffer.call_count == 1 - assert mock_driver.launchTasks.call_count == 1 - assert mock_get_metric.call_count == 3 - assert ef.task_metadata[task_id].task_state == "UNKNOWN" - - -def test_resource_offers_no_tasks_to_launch( - ef, fake_offer, mock_driver, mock_get_metric -): - ef.suppress_after = 0.0 - - ef.resourceOffers(mock_driver, [fake_offer]) - - assert mock_driver.declineOffer.call_args == mock.call( - [fake_offer.id], ef.offer_decline_filter - ) - assert mock_driver.suppressOffers.call_count == 1 - assert ef.are_offers_suppressed - assert mock_driver.launchTasks.call_count == 0 - assert mock_get_metric.call_count == 0 - assert mock_get_metric.return_value.count.call_count == 0 - - -def test_resource_offers_blacklisted_offer( - ef, fake_task, fake_offer, mock_driver, mock_get_metric -): - ef.blacklisted_slaves = ef.blacklisted_slaves.append( - fake_offer.agent_id.value, - ) - ef.task_queue.put(fake_task) - ef.resourceOffers(mock_driver, [fake_offer]) - - assert mock_driver.declineOffer.call_count == 1 - assert mock_driver.declineOffer.call_args == mock.call( - [fake_offer.id], ef.offer_decline_filter - ) - assert mock_driver.launchTasks.call_count == 0 - assert mock_get_metric.call_count == 0 - assert mock_get_metric.return_value.count.call_count == 0 - - -def test_resource_offers_not_for_pool( - ef, fake_task, fake_offer, mock_driver, mock_get_metric -): - ef.offer_matches_pool = mock.Mock(return_value=(False, None)) - - ef.task_queue.put(fake_task) - ef.resourceOffers(mock_driver, [fake_offer]) - - assert ef.offer_matches_pool.call_count == 1 - assert ef.offer_matches_pool.call_args == mock.call(fake_offer) - assert mock_driver.declineOffer.call_count == 1 - assert mock_driver.declineOffer.call_args == mock.call( - [fake_offer.id], ef.offer_decline_filter - ) - assert mock_driver.launchTasks.call_count == 0 - assert mock_get_metric.call_count == 0 - assert mock_get_metric.return_value.count.call_count == 0 - - -def test_resource_offers_unmet_reqs( - ef, fake_task, fake_offer, mock_driver, mock_get_metric -): - ef.callbacks.get_tasks_for_offer = mock.Mock(return_value=([], [fake_task])) - - ef.task_queue.put(fake_task) - ef.resourceOffers(mock_driver, [fake_offer]) - - assert mock_driver.declineOffer.call_count == 1 - assert mock_driver.declineOffer.call_args == mock.call( - [fake_offer.id], ef.offer_decline_filter - ) - assert mock_driver.launchTasks.call_count == 0 - assert mock_get_metric.call_count == 1 - mock_get_metric.assert_any_call(metrics.TASK_INSUFFICIENT_OFFER_COUNT) - assert mock_get_metric.return_value.count.call_count == 1 - - -def status_update_test_prep(state, reason=""): - task = MesosTaskConfig(cmd="/bin/true", name="fake_name", image="fake_image") - task_id = task.task_id - update = Dict(task_id=Dict(value=task_id), state=state, reason=reason) - task_metadata = TaskMetadata( - task_config=task, - task_state="TASK_INITED", - task_state_history=m(TASK_INITED=time.time()), - ) - - return update, task_id, task_metadata - - -def test_status_update_record_only(ef, mock_driver): - update, task_id, task_metadata = status_update_test_prep("fake_state1") - ef.translator = mock.Mock() - ef._driver = mock_driver - - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - ef.statusUpdate(mock_driver, update) - - assert ef.task_metadata[task_id].task_state == "fake_state1" - assert len(ef.task_metadata[task_id].task_state_history) == 2 - assert mock_driver.acknowledgeStatusUpdate.call_count == 1 - assert mock_driver.acknowledgeStatusUpdate.call_args == mock.call(update) - - -def test_status_update_finished(ef, mock_driver, mock_get_metric): - # finished task does same thing as other states - update, task_id, task_metadata = status_update_test_prep("TASK_FINISHED") - ef.translator = mock.Mock() - ef._driver = mock_driver - - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - ef.statusUpdate(mock_driver, update) - - assert task_id not in ef.task_metadata - assert mock_get_metric.call_count == 1 - assert mock_get_metric.call_args == mock.call(metrics.TASK_FINISHED_COUNT) - assert mock_get_metric.return_value.count.call_count == 1 - assert mock_get_metric.return_value.count.call_args == mock.call(1) - assert mock_driver.acknowledgeStatusUpdate.call_count == 1 - assert mock_driver.acknowledgeStatusUpdate.call_args == mock.call(update) - - -def test_ignore_status_update(ef, mock_driver, mock_get_metric): - update, task_id, task_metadata = status_update_test_prep("TASK_FINISHED") - ef.translator = mock.Mock() - ef._driver = mock_driver - - ef.statusUpdate(mock_driver, update) - - assert task_id not in ef.task_metadata - assert mock_get_metric.call_count == 0 - assert mock_get_metric.return_value.count.call_count == 0 - assert mock_driver.acknowledgeStatusUpdate.call_count == 1 - - -def test_task_lost_due_to_invalid_offers(ef, mock_driver, mock_get_metric): - update, task_id, task_metadata = status_update_test_prep( - state="TASK_LOST", reason="REASON_INVALID_OFFERS" - ) - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - ef._driver = mock_driver - - ef.statusUpdate(mock_driver, update) - - assert task_id in ef.task_metadata - assert mock_get_metric.call_count == 2 - assert ef.event_queue.qsize() == 0 - assert ef.task_queue.qsize() == 1 - assert mock_driver.acknowledgeStatusUpdate.call_count == 1 - - -def test_background_thread_removes_offer_timeout( - ef, - mock_driver, - fake_task, - mock_time, - mock_sleep, -): - mock_time.return_value = 2.0 - task_id = fake_task.task_id - fake_task = fake_task.set(offer_timeout=1) - task_metadata = TaskMetadata( - agent_id="fake_agent_id", - task_config=fake_task, - task_state="TASK_INITED", - task_state_history=m(TASK_INITED=0.0), - ) - ef.driver = mock_driver - ef.task_metadata = ef.task_metadata.set(task_id, task_metadata) - ef._background_check() - assert ef.task_queue.empty() - assert task_id not in ef.task_metadata.keys() - assert not ef.event_queue.empty() - event = ef.event_queue.get(block=False) - assert event.terminal is True - assert event.success is False - assert event.task_id == task_id - - -def test_launch_tasks_for_offer_task_missing(ef, fake_task, fake_offer): - tasks_to_launch = [fake_task] - ef.launch_tasks_for_offer(fake_offer, tasks_to_launch) - - -def test_reconcile_task_unknown( - ef, - mock_driver, - fake_task, -): - ef._driver = mock_driver - ef._reconcile_tasks_at = 0 - assert fake_task.task_id not in ef.task_metadata - - ef.reconcile_task(fake_task) - assert fake_task.task_id in ef.task_metadata - assert mock_driver.reconcileTasks.call_count == 1 - - -def test_reconcile_task_existing( - ef, - mock_driver, - fake_task, -): - ef._driver = mock_driver - ef._reconcile_tasks_at = 0 - ef.task_metadata = ef.task_metadata.set( - fake_task.task_id, - TaskMetadata( - task_config=fake_task, - task_state="TASK_INITED", - task_state_history=m(TASK_INITED=time.time()), - ), - ) - - ef.reconcile_task(fake_task) - task_metadata = ef.task_metadata[fake_task.task_id] - assert len(task_metadata.task_state_history) == 2 - assert mock_driver.reconcileTasks.call_count == 1 diff --git a/tests/unit/plugins/mesos/logging_executor_test.py b/tests/unit/plugins/mesos/logging_executor_test.py deleted file mode 100644 index 69f549e2..00000000 --- a/tests/unit/plugins/mesos/logging_executor_test.py +++ /dev/null @@ -1,185 +0,0 @@ -from queue import Queue - -import mock -import pytest -from addict import Dict - -from task_processing.plugins.mesos.logging_executor import MesosLoggingExecutor -from task_processing.plugins.mesos.task_config import MesosTaskConfig - - -@pytest.fixture -def mock_Thread(): - with mock.patch("task_processing.plugins.mesos.logging_executor.Thread"): - yield - - -@pytest.fixture -def source_queue(): - return Queue() - - -@pytest.fixture -def mock_downstream(source_queue): - executor = mock.MagicMock() - executor.get_event_queue.return_value = source_queue - return executor - - -@pytest.fixture -def mock_logging_executor(mock_Thread, mock_downstream): - return MesosLoggingExecutor(downstream_executor=mock_downstream) - - -def test_run(mock_logging_executor, mock_downstream): - mock_config = MesosTaskConfig(image="fake", cmd="cat") - mock_logging_executor.run(mock_config) - assert mock_downstream.run.call_count == 1 - - -def test_kill(mock_logging_executor, mock_downstream): - result = mock_logging_executor.kill("task") - assert result == mock_downstream.kill.return_value - assert mock_downstream.kill.call_args == mock.call("task") - - -def test_reconcile(mock_logging_executor, mock_downstream): - mock_logging_executor.reconcile("task") - assert mock_downstream.reconcile.call_args == mock.call("task") - - -def test_stop(mock_logging_executor, mock_downstream): - mock_logging_executor.stop() - assert mock_downstream.stop.call_args == mock.call() - assert mock_logging_executor.stopping - - -def test_event_loop_stores_staging_event(mock_logging_executor, source_queue): - raw = Dict( - { - "offer": { - "url": { - "scheme": "http", - "address": { - "ip": "1.2.3.4", - "port": 5051, - }, - }, - }, - } - ) - mock_event = mock.Mock( - kind="task", - platform_type="staging", - task_id="my_task", - raw=raw, - ) - - mock_logging_executor.stopping = True - source_queue.put(mock_event) - - mock_logging_executor.event_loop() - task_data = mock_logging_executor.staging_tasks["my_task"] - assert task_data == "http://1.2.3.4:5051" - - -def test_event_loop_stores_staging_event_with_bogus_url( - mock_logging_executor, source_queue -): - raw = Dict( - { - "offer": { - "url": { - "scheme": None, - "address": {}, - }, - }, - } - ) - mock_event = mock.Mock( - kind="task", - platform_type="staging", - task_id="my_task", - raw=raw, - ) - - mock_logging_executor.stopping = True - source_queue.put(mock_event) - - mock_logging_executor.event_loop() - task_data = mock_logging_executor.staging_tasks["my_task"] - assert task_data is None - - -def test_event_loop_continues_after_unknown_task(mock_logging_executor, source_queue): - unknown_event = mock.Mock( - kind="task", - platform_type="running", - task_id="new_task", - ) - other_event = mock.Mock( - kind="task", - platform_type="something", - task_id="other_task", - ) - - mock_logging_executor.stopping = True - source_queue.put(unknown_event) - source_queue.put(other_event) - - mock_logging_executor.event_loop() - - dest_queue = mock_logging_executor.get_event_queue() - assert dest_queue.get() == unknown_event - assert dest_queue.get() == other_event - - -def test_event_loop_running_event(mock_logging_executor, source_queue): - raw = Dict( - { - "container_status": { - "container_id": { - "value": "cid", - }, - }, - "executor_id": { - "value": "eid", - }, - } - ) - mock_event = mock.Mock( - kind="task", - platform_type="running", - task_id="my_task", - raw=raw, - ) - - mock_logging_executor.stopping = True - source_queue.put(mock_event) - mock_logging_executor.staging_tasks = mock_logging_executor.staging_tasks.set( - "my_task", "my_log_url" - ) - - mock_logging_executor.event_loop() - assert "my_task" in mock_logging_executor.running_tasks - assert "my_task" not in mock_logging_executor.staging_tasks - - -def test_event_loop_terminal_event(mock_logging_executor, source_queue): - mock_event = mock.Mock( - kind="task", - platform_type="finished", - task_id="my_task", - terminal=True, - ) - - mock_logging_executor.stopping = True - source_queue.put(mock_event) - mock_logging_executor.running_tasks = mock_logging_executor.running_tasks.set( - "my_task", mock.Mock() - ) - - mock_logging_executor.event_loop() - - assert "my_task" in mock_logging_executor.running_tasks - assert "my_task" in mock_logging_executor.done_tasks diff --git a/tests/unit/plugins/mesos/mesos_executor_test.py b/tests/unit/plugins/mesos/mesos_executor_test.py deleted file mode 100644 index fea3b3cc..00000000 --- a/tests/unit/plugins/mesos/mesos_executor_test.py +++ /dev/null @@ -1,90 +0,0 @@ -import mock -import pytest - -from task_processing.plugins.mesos.mesos_executor import MesosExecutor -from task_processing.plugins.mesos.mesos_executor import MesosExecutorCallbacks - - -@pytest.fixture -def mock_callbacks(): - return (MesosExecutorCallbacks(mock.Mock(), mock.Mock(), mock.Mock()),) - - -@pytest.fixture -def mesos_executor(request, mock_callbacks, mock_Thread, mock_fw_and_driver): - dummy_executor = MesosExecutor("role", callbacks=mock_callbacks) - - def mesos_executor_teardown(): - dummy_executor.stop() - - request.addfinalizer(mesos_executor_teardown) - - return dummy_executor - - -def test_creates_execution_framework_and_driver( - mock_callbacks, - mock_Thread, - mesos_executor, - mock_fw_and_driver, -): - execution_framework, mesos_driver = mock_fw_and_driver - assert mesos_executor.execution_framework is execution_framework.return_value - assert execution_framework.call_args == mock.call( - name="taskproc-default", - task_staging_timeout_s=240, - initial_decline_delay=1.0, - pool=None, - role="role", - callbacks=mock_callbacks, - framework_id=None, - ) - - assert mesos_executor.driver is mesos_driver.return_value - assert mesos_driver.call_args == mock.call( - sched=execution_framework.return_value, - framework=execution_framework.return_value.framework_info, - use_addict=True, - master_uri="127.0.0.1:5050", - implicit_acknowledgements=False, - principal="taskproc", - secret=None, - failover=False, - ) - - assert mock_Thread.call_args == mock.call( - target=mesos_executor._run_driver, args=() - ) - - -def test_run_passes_task_to_execution_framework(mesos_executor): - mesos_executor.run("task") - assert mesos_executor.execution_framework.enqueue_task.call_args == mock.call( - "task" - ) - - -def test_stop_shuts_down_properly(mesos_executor): - mesos_executor.stop() - assert mesos_executor.execution_framework.stop.call_count == 1 - assert mesos_executor.driver.stop.call_count == 1 - assert mesos_executor.driver.join.call_count == 1 - - -def test_event_queue(mocker, mesos_executor): - q = mocker.Mock() - mesos_executor.execution_framework.event_queue = q - assert mesos_executor.get_event_queue() is q - - -def test_kill_returns(mesos_executor): - result = mesos_executor.kill("task") - assert result == mesos_executor.execution_framework.kill_task.return_value - assert mesos_executor.execution_framework.kill_task.call_args == mock.call("task") - - -def test_reconcile(mesos_executor): - mesos_executor.reconcile("task") - assert mesos_executor.execution_framework.reconcile_task.call_args == mock.call( - "task" - ) diff --git a/tests/unit/plugins/mesos/mesos_task_config_test.py b/tests/unit/plugins/mesos/mesos_task_config_test.py deleted file mode 100644 index 1514bb11..00000000 --- a/tests/unit/plugins/mesos/mesos_task_config_test.py +++ /dev/null @@ -1,35 +0,0 @@ -from pyrsistent import InvariantException - -from task_processing.plugins.mesos.task_config import MesosTaskConfig - - -def test_mesos_task_config_factories(): - m = MesosTaskConfig( - cmd="/bin/true", cpus=1, mem=64, disk=15, gpus=6.0, image="fake_image" - ) - - assert type(m.cpus) is float - assert m.cpus == 1.0 - - assert type(m.mem) is float - assert m.mem == 64.0 - - assert type(m.disk) is float - assert m.disk == 15.0 - - assert type(m.gpus) is int - assert m.gpus == 6 - - try: - m = m.set(name="a" * 256) - assert False, "Task id longer than 255 characters was accepted" - except InvariantException as e: - print(e) - assert True - - -def test_mesos_task_config_set_task_id(): - m = MesosTaskConfig(cmd="/bin/true", image="fake") - new_task_id = "new" + m.task_id - result = m.set_task_id(new_task_id) - assert result.task_id == new_task_id diff --git a/tests/unit/plugins/mesos/mesos_task_executor_test.py b/tests/unit/plugins/mesos/mesos_task_executor_test.py deleted file mode 100644 index e6b89a7f..00000000 --- a/tests/unit/plugins/mesos/mesos_task_executor_test.py +++ /dev/null @@ -1,48 +0,0 @@ -import mock -import pytest - -from task_processing.plugins.mesos.mesos_task_executor import get_tasks_for_offer - - -@pytest.fixture -def resource_patches(): - with mock.patch( - "task_processing.plugins.mesos.mesos_task_executor.task_fits", - ) as mock_fits, mock.patch( - "task_processing.plugins.mesos.mesos_task_executor.attributes_match_constraints", - ) as mock_constraints, mock.patch( - "task_processing.plugins.mesos.mesos_task_executor.allocate_task_resources", - ) as mock_allocate: - yield mock_fits, mock_constraints, mock_allocate - - -@pytest.mark.parametrize("fits,constraints", [(False, True), (True, False)]) -def test_get_tasks_for_offer_doesnt_fit(resource_patches, fits, constraints): - mock_fits, mock_constraints, mock_allocate = resource_patches - mock_fits.return_value = fits - mock_constraints.return_value = constraints - tasks_to_launch, tasks_to_defer = get_tasks_for_offer( - [mock.Mock()], - mock.Mock(), - mock.Mock(), - "role", - ) - - assert mock_allocate.call_count == 0 - assert len(tasks_to_launch) == 0 - assert len(tasks_to_defer) == 1 - - -def test_get_tasks_for_offer(resource_patches): - _, _, mock_allocate = resource_patches - mock_allocate.return_value = mock.Mock(), [] - tasks_to_launch, tasks_to_defer = get_tasks_for_offer( - [mock.Mock()], - mock.Mock(), - mock.Mock(), - "role", - ) - - assert mock_allocate.call_count == 1 - assert len(tasks_to_launch) == 1 - assert len(tasks_to_defer) == 0 diff --git a/tests/unit/plugins/mesos/resource_helpers_test.py b/tests/unit/plugins/mesos/resource_helpers_test.py deleted file mode 100644 index d8038b73..00000000 --- a/tests/unit/plugins/mesos/resource_helpers_test.py +++ /dev/null @@ -1,66 +0,0 @@ -import pytest -from pyrsistent import m -from pyrsistent import v - -from task_processing.plugins.mesos.resource_helpers import allocate_task_resources -from task_processing.plugins.mesos.resource_helpers import get_offer_resources -from task_processing.plugins.mesos.resource_helpers import ResourceSet -from task_processing.plugins.mesos.resource_helpers import task_fits - - -@pytest.fixture -def offer_resources(): - return ResourceSet( - cpus=10, - mem=1024, - disk=1000, - gpus=1, - ) - - -@pytest.mark.parametrize("role", ["fake_role", "none"]) -def test_get_offer_resources(fake_offer, role): - assert get_offer_resources(fake_offer, role) == ResourceSet( - cpus=10 if role != "none" else 0, - mem=1024 if role != "none" else 0, - disk=1000 if role != "none" else 0, - gpus=1 if role != "none" else 0, - ports=v(m(begin=31200, end=31500)) if role != "none" else v(), - ) - - -@pytest.mark.parametrize( - "available_ports", - [ - v(m(begin=5, end=10)), - v(m(begin=3, end=3), m(begin=6, end=10)), - ], -) -def test_allocate_task_resources(fake_task, offer_resources, available_ports): - offer_resources = offer_resources.set("ports", available_ports) - expected_port = available_ports[0].begin - consumed, remaining = allocate_task_resources(fake_task, offer_resources) - assert consumed == fake_task.set(ports=v(m(begin=expected_port, end=expected_port))) - assert remaining == { - "cpus": 0, - "mem": 0, - "disk": 0, - "gpus": 0, - "ports": v(m(begin=6, end=10)), - } - - -@pytest.mark.parametrize( - "cpus,available_ports", - [ - (5, v([m(begin=5, end=10)])), - (10, v()), - (10, v([m(begin=5, end=10)])), - ], -) -def test_task_fits(fake_task, offer_resources, cpus, available_ports): - offer_resources = offer_resources.set("cpus", cpus) - offer_resources = offer_resources.set("ports", available_ports) - assert task_fits(fake_task, offer_resources) == ( - cpus == 10 and len(available_ports) > 0 - ) diff --git a/tests/unit/plugins/mesos/retrying_executor_test.py b/tests/unit/plugins/mesos/retrying_executor_test.py deleted file mode 100644 index 56e81eee..00000000 --- a/tests/unit/plugins/mesos/retrying_executor_test.py +++ /dev/null @@ -1,287 +0,0 @@ -from queue import Queue - -import mock -import pytest - -from task_processing.interfaces.event import Event -from task_processing.plugins.mesos.retrying_executor import RetryingExecutor -from task_processing.plugins.mesos.task_config import MesosTaskConfig - - -@pytest.fixture -def mock_Thread(): - with mock.patch("task_processing.plugins.mesos.retrying_executor.Thread"): - yield - - -@pytest.fixture -def source_queue(): - return Queue() - - -@pytest.fixture -def mock_downstream(source_queue): - executor = mock.MagicMock() - executor.get_event_queue.return_value = source_queue - return executor - - -@pytest.fixture -def mock_retrying_executor(mock_Thread, mock_downstream): - return RetryingExecutor( - downstream_executor=mock_downstream, - retries=2, - ) - - -@pytest.fixture -def mock_task_config(): - return MesosTaskConfig( - uuid="mock_uuid", - name="mock_name", - image="mock_image", - cmd="mock_cmd", - retries=5, - ) - - -@pytest.fixture -def mock_event(mock_task_config, is_terminal=False): - return Event( - kind="task", - timestamp=1234.5678, - terminal=is_terminal, - success=False, - task_id=mock_task_config.task_id, - platform_type="mesos", - message="mock_message", - task_config=mock_task_config, - raw="raw_event", - ) - - -# task_retry ############################################################# -def test_task_retry(mock_retrying_executor, mock_event): - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_event.task_id, 3 - ) - mock_retrying_executor.run = mock.Mock() - - mock_retrying_executor.retry(mock_event) - - assert mock_retrying_executor.task_retries[mock_event.task_id] == 2 - assert mock_retrying_executor.run.call_count == 1 - - -def test_task_retry_retries_exhausted(mock_retrying_executor, mock_event): - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_event.task_id, 0 - ) - mock_retrying_executor.run = mock.Mock() - - retry_attempted = mock_retrying_executor.retry(mock_event) - - assert mock_retrying_executor.task_retries[mock_event.task_id] == 0 - assert mock_retrying_executor.run.call_count == 0 - assert not retry_attempted - - -# retry_loop ############################################################# -def test_retry_loop_retries_task(mock_retrying_executor, mock_event): - mock_event = mock_event.set("terminal", True) - mock_retrying_executor.stopping = True - mock_retrying_executor._is_current_attempt = mock.Mock(return_value=True) - mock_retrying_executor._restore_task_id = mock.Mock(return_value=mock_event) - mock_retrying_executor.retry = mock.Mock(return_value=True) - mock_retrying_executor.retry_pred = mock.Mock(return_value=True) - mock_retrying_executor.src_queue.put(mock_event) - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_event.task_id, 1 - ) - - mock_retrying_executor.retry_loop() - - assert mock_retrying_executor.dest_queue.qsize() == 0 - assert mock_retrying_executor.retry.call_count == 1 - - -def test_retry_loop_does_not_retry_task(mock_retrying_executor, mock_event): - mock_event = mock_event.set("terminal", True) - mock_retrying_executor.stopping = True - mock_retrying_executor._is_current_attempt = mock.Mock(return_value=True) - mock_retrying_executor.retry = mock.Mock(return_value=False) - mock_retrying_executor.retry_pred = mock.Mock(return_value=False) - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_event.task_id, 1 - ) - modified_task_id = mock_event.task_id + "-retry1" - modified_mock_event = mock_event.set("task_id", modified_task_id) - mock_retrying_executor.src_queue = Queue() - mock_retrying_executor.src_queue.put(modified_mock_event) - - mock_retrying_executor.retry_loop() - - assert mock_retrying_executor.dest_queue.qsize() == 1 - assert len(mock_retrying_executor.task_retries) == 0 - - -def test_retry_loop_filters_out_non_task(mock_retrying_executor): - mock_event = Event( - kind="control", raw="some message", message="stop", terminal=True - ) - - mock_retrying_executor.stopping = True - mock_retrying_executor._is_current_attempt = mock.Mock(return_value=True) - mock_retrying_executor.event_with_retries = mock.Mock() - mock_retrying_executor.src_queue.put(mock_event) - - mock_retrying_executor.retry_loop() - - assert mock_retrying_executor.dest_queue.qsize() == 1 - - -# If retrying_executor receives an event about an attempt for a task the -# executor does not know about, it should add the task into task_retries -# and assume the event's attempt is the current attempt -def test_retry_loop_recover_attempt(mock_retrying_executor, mock_event): - original_task_id = mock_event.task_id - modified_mock_event = mock_event.set("task_id", original_task_id + "-retry6") - modified_mock_event = modified_mock_event.set("terminal", True) - mock_retrying_executor.stopping = True - mock_retrying_executor.retry = mock.Mock(return_value=True) - mock_retrying_executor.retry_pred = mock.Mock(return_value=True) - mock_retrying_executor.src_queue.put(modified_mock_event) - - mock_retrying_executor.retry_loop() - - assert mock_retrying_executor.dest_queue.qsize() == 0 - assert mock_retrying_executor.retry.call_count == 1 - assert mock_retrying_executor.task_retries[original_task_id] == 6 - - -# run #################################################################### -def test_run(mock_retrying_executor, mock_downstream, mock_task_config): - mock_retrying_executor.run(mock_task_config) - - assert mock_downstream.run.call_count == 1 - assert mock_retrying_executor.task_retries[mock_task_config.task_id] == 5 - - # Config should be the same, except with retry number appended - config_with_retry = mock_downstream.run.call_args[0][0] - assert config_with_retry.task_id == mock_task_config.task_id + "-retry5" - assert config_with_retry.cmd == mock_task_config.cmd - assert config_with_retry.image == mock_task_config.image - - -def test_run_default_retries(mock_retrying_executor, mock_downstream): - mock_config = MesosTaskConfig(image="fake_image", cmd="some command") - mock_retrying_executor.run(mock_config) - assert mock_downstream.run.call_count == 1 - - assert mock_retrying_executor.task_retries[mock_config.task_id] == 2 - - -# reconcile ############################################################## -def test_reconcile(mock_retrying_executor, mock_downstream): - mock_retrying_executor.reconcile("task") - - assert mock_downstream.reconcile.call_args == mock.call("task") - - -# kill ################################################################### -def test_kill(mock_retrying_executor, mock_downstream): - result = mock_retrying_executor.kill("task") - - assert result == mock_downstream.kill.return_value - assert mock_downstream.kill.call_args == mock.call("task") - assert mock_retrying_executor.task_retries["task"] == -1 - - -# stop ################################################################### -def test_stop(mock_retrying_executor, mock_downstream): - mock_retrying_executor.stop() - - assert mock_downstream.stop.call_args == mock.call() - assert mock_retrying_executor.stopping is True - - -# _task_config_with_retry ################################################ -def test_task_config_with_retry(mock_retrying_executor, mock_task_config): - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_task_config.task_id, 2 - ) - - ret_value = mock_retrying_executor._task_config_with_retry(mock_task_config) - - assert ret_value.task_id == mock_task_config.task_id + "-retry2" - - -# _restore_task_id ####################################################### -def test_restore_task_id(mock_retrying_executor, mock_event): - original_task_id = mock_event.task_id - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_event.task_id, 1 - ) - modified_task_config = mock_event.task_config.set( - "uuid", str(mock_event.task_config.uuid) + "-retry1" - ) - mock_event = mock_event.set("task_config", modified_task_config) - - ret_value = mock_retrying_executor._restore_task_id(mock_event, original_task_id) - - assert mock_event.task_id == ret_value.task_id - - -# _is_current_attempt #################################################### -def test_is_current_attempt( - mock_retrying_executor, - mock_event, - mock_task_config, -): - original_task_id = mock_event.task_id - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_event.task_id, 2 - ) - modified_task_id = str(mock_event.task_config.uuid) + "-retry2" - modified_task_config = mock_event.task_config.set("uuid", modified_task_id) - modified_mock_event = mock_event.set("task_config", modified_task_config) - modified_mock_event = mock_event.set("task_id", modified_task_id) - - ret_value = mock_retrying_executor._is_current_attempt( - modified_mock_event, original_task_id - ) - - assert ret_value is True - - -def test_is_not_current_attempt(mock_retrying_executor, mock_event): - original_task_id = mock_event.task_id - mock_retrying_executor.task_retries = mock_retrying_executor.task_retries.set( - mock_event.task_id, 2 - ) - modified_task_id = str(mock_event.task_config.uuid) + "-retry1" - modified_task_config = mock_event.task_config.set("uuid", modified_task_id) - modified_mock_event = mock_event.set("task_config", modified_task_config) - modified_mock_event = mock_event.set("task_id", modified_task_id) - - ret_value = mock_retrying_executor._is_current_attempt( - modified_mock_event, original_task_id - ) - - assert ret_value is False - - -def test_is_unknown_attempt(mock_retrying_executor, mock_event): - original_task_id = mock_event.task_id - modified_task_id = str(mock_event.task_config.uuid) + "-retry8" - modified_task_config = mock_event.task_config.set("uuid", modified_task_id) - modified_mock_event = mock_event.set("task_config", modified_task_config) - modified_mock_event = mock_event.set("task_id", modified_task_id) - - ret_value = mock_retrying_executor._is_current_attempt( - modified_mock_event, - original_task_id, - ) - - assert ret_value is True - assert mock_retrying_executor.task_retries.get(original_task_id) == 8 diff --git a/tests/unit/plugins/mesos/timeout_executor_test.py b/tests/unit/plugins/mesos/timeout_executor_test.py deleted file mode 100644 index 2c787e8e..00000000 --- a/tests/unit/plugins/mesos/timeout_executor_test.py +++ /dev/null @@ -1,209 +0,0 @@ -from queue import Queue - -import mock -import pytest - -from task_processing.interfaces.event import Event -from task_processing.plugins.mesos.task_config import MesosTaskConfig -from task_processing.plugins.mesos.timeout_executor import TaskEntry -from task_processing.plugins.mesos.timeout_executor import TimeoutExecutor - - -@pytest.fixture -def mock_Thread(): - with mock.patch("task_processing.plugins.mesos.timeout_executor.Thread"): - yield - - -@pytest.fixture -def source_queue(): - return Queue() - - -@pytest.fixture -def mock_downstream(source_queue): - executor = mock.MagicMock() - executor.get_event_queue.return_value = source_queue - return executor - - -@pytest.fixture -def mock_timeout_executor(mock_Thread, mock_downstream): - return TimeoutExecutor(downstream_executor=mock_downstream) - - -@pytest.fixture -def mock_task_config(): - return MesosTaskConfig( - uuid="mock_uuid", - name="mock_name", - image="mock_image", - cmd="mock_cmd", - timeout=1000, - ) - - -@pytest.fixture -def mock_entry(mock_task_config): - return TaskEntry( - task_id=mock_task_config.task_id, - deadline=mock_task_config.timeout + 2000, - ) - - -@pytest.fixture -def mock_event(mock_task_config): - return Event( - kind="task", - timestamp=1234.5678, - terminal=True, - task_id=mock_task_config.task_id, - platform_type="mesos", - message="mock_message", - task_config=mock_task_config, - raw="raw_event", - ) - - -# timeout_loop ########################################################### -def test_timeout_loop_nontask( - mock_timeout_executor, - mock_event, -): - mock_event = mock_event.set("kind", "control") - mock_entry = TaskEntry("different_id", deadline=1234) - mock_timeout_executor.stopping = True - mock_timeout_executor.src_queue.put(mock_event) - mock_timeout_executor.running_tasks.append(mock_entry) - - with mock.patch("time.time", mock.Mock(return_value=0)): - mock_timeout_executor.timeout_loop() - - assert len(mock_timeout_executor.running_tasks) == 1 - - -def test_timeout_loop_terminal_task_timed_out( - mock_timeout_executor, - mock_event, - mock_entry, -): - mock_timeout_executor.stopping = True - mock_timeout_executor.src_queue.put(mock_event) - mock_timeout_executor.running_tasks.append(mock_entry) - mock_timeout_executor.killed_tasks.append(mock_entry.task_id) - mock_timeout_executor.downstream_executor.kill = mock.Mock() - - mock_timeout_executor.timeout_loop() - - assert mock_timeout_executor.downstream_executor.kill.call_count == 0 - assert len(mock_timeout_executor.running_tasks) == 0 - assert len(mock_timeout_executor.killed_tasks) == 0 - - -def test_timeout_loop_existing_nonterminal_task( - mock_timeout_executor, - mock_event, - mock_entry, -): - mock_event = mock_event.set("terminal", False) - mock_timeout_executor.stopping = True - mock_timeout_executor.src_queue.put(mock_event) - mock_timeout_executor.running_tasks.append(mock_entry) - mock_timeout_executor.downstream_executor.kill = mock.Mock() - - with mock.patch("time.time", mock.Mock(return_value=10000)): - mock_timeout_executor.timeout_loop() - - assert mock_timeout_executor.downstream_executor.kill.call_args == mock.call( - mock_entry.task_id - ) - assert len(mock_timeout_executor.running_tasks) == 0 - assert len(mock_timeout_executor.killed_tasks) == 1 - - -def test_timeout_loop_nonexistent_nonterminal_task( - mock_timeout_executor, - mock_event, - mock_entry, -): - mock_event = mock_event.set("terminal", False) - mock_timeout_executor.stopping = True - mock_timeout_executor.src_queue.put(mock_event) - mock_timeout_executor.downstream_executor.kill = mock.Mock() - - with mock.patch("time.time", mock.Mock(return_value=10000)): - mock_timeout_executor.timeout_loop() - - assert mock_timeout_executor.downstream_executor.kill.call_args == mock.call( - mock_entry.task_id - ) - assert len(mock_timeout_executor.running_tasks) == 0 - assert len(mock_timeout_executor.killed_tasks) == 1 - - -# run #################################################################### -def test_run(mock_timeout_executor, mock_downstream): - mock_config = MesosTaskConfig(image="fake", cmd="cat", timeout=60) - mock_timeout_executor.run(mock_config) - assert mock_downstream.run.call_count == 1 - - assert len(mock_timeout_executor.running_tasks) == 1 - - -# reconcile ############################################################## -def test_reconcile(mock_timeout_executor, mock_downstream): - mock_timeout_executor.reconcile("task") - assert mock_downstream.reconcile.call_args == mock.call("task") - - -# kill ################################################################### -def test_kill_existing_task(mock_timeout_executor, mock_downstream): - mock_timeout_executor.running_tasks = [TaskEntry("task", 10)] - mock_timeout_executor.downstream_executor.kill = mock.Mock(return_value=True) - - result = mock_timeout_executor.kill("task") - - assert result == mock_downstream.kill.return_value - assert mock_downstream.kill.call_args == mock.call("task") - assert len(mock_timeout_executor.running_tasks) == 0 - assert len(mock_timeout_executor.killed_tasks) == 1 - - -# stop ################################################################### -def test_stop(mock_timeout_executor, mock_downstream): - mock_timeout_executor.stop() - assert mock_downstream.stop.call_args == mock.call() - assert mock_timeout_executor.stopping - - -# _insert_new_running_task_entry ######################################### -def test_insert_new_running_task_entry_enumerate(mock_timeout_executor): - mock_entry_one = TaskEntry("fake_entry_one", 1) - mock_entry_two = TaskEntry("fake_entry_two", 2) - mock_entry_three = TaskEntry("fake_entry_three", 3) - mock_timeout_executor.running_tasks.append(mock_entry_one) - mock_timeout_executor.running_tasks.append(mock_entry_three) - - mock_timeout_executor._insert_new_running_task_entry(mock_entry_two) - - assert [entry.deadline for entry in mock_timeout_executor.running_tasks] == [ - 1, - 2, - 3, - ] - - -def test_insert_new_running_task_entry_append(mock_timeout_executor): - mock_entry_one = TaskEntry("fake_entry_one", 1) - mock_entry_two = TaskEntry("fake_entry_two", 2) - mock_entry_three = TaskEntry("fake_entry_three", 3) - mock_timeout_executor.running_tasks.append(mock_entry_one) - mock_timeout_executor.running_tasks.append(mock_entry_two) - - mock_timeout_executor._insert_new_running_task_entry(mock_entry_three) - - assert [entry.deadline for entry in mock_timeout_executor.running_tasks] == [ - 1, - 2, - 3, - ] diff --git a/tests/unit/plugins/mesos/translator_test.py b/tests/unit/plugins/mesos/translator_test.py deleted file mode 100644 index 6dc44fa3..00000000 --- a/tests/unit/plugins/mesos/translator_test.py +++ /dev/null @@ -1,151 +0,0 @@ -import addict -import mock -import pytest -from pyrsistent import v - -from task_processing.interfaces.event import Event -from task_processing.plugins.mesos.translator import make_mesos_task_info -from task_processing.plugins.mesos.translator import MESOS_STATUS_MAP -from task_processing.plugins.mesos.translator import mesos_update_to_event - - -@pytest.mark.parametrize( - "gpus_count,containerizer,container", - [ - ( - 1.0, - "MESOS", - addict.Dict( - type="MESOS", - volumes=[ - addict.Dict( - container_path="fake_container_path", - host_path="fake_host_path", - mode="RO", - ) - ], - mesos=addict.Dict( - image=addict.Dict( - type="DOCKER", - docker=addict.Dict(name="fake_image"), - cached=True, - ), - ), - network_infos=addict.Dict( - port_mappings=[addict.Dict(host_port=31200, container_port=8888)], - ), - ), - ), - ( - 0, - "DOCKER", - addict.Dict( - type="DOCKER", - volumes=[ - addict.Dict( - container_path="fake_container_path", - host_path="fake_host_path", - mode="RO", - ) - ], - docker=addict.Dict( - image="fake_image", - network="BRIDGE", - force_pull_image=False, - port_mappings=[addict.Dict(host_port=31200, container_port=8888)], - parameters=[], - ), - ), - ), - ], -) -def test_make_mesos_task_info( - fake_task, - fake_offer, - gpus_count, - containerizer, - container, -): - tid = fake_task.task_id - fake_task = fake_task.set( - volumes=v( - addict.Dict( - mode="RO", - container_path="fake_container_path", - host_path="fake_host_path", - ) - ), - gpus=gpus_count, - containerizer=containerizer, - ) - - task_info = make_mesos_task_info( - fake_task, - fake_offer.agent_id.value, - "fake_role", - ) - - expected_task_info = addict.Dict( - task_id=addict.Dict(value=tid), - agent_id=addict.Dict(value="fake_agent_id"), - name="executor-{id}".format(id=tid), - resources=[ - addict.Dict( - name="cpus", - type="SCALAR", - role="fake_role", - scalar=addict.Dict(value=10.0), - ), - addict.Dict( - name="mem", - type="SCALAR", - role="fake_role", - scalar=addict.Dict(value=1024.0), - ), - addict.Dict( - name="disk", - type="SCALAR", - role="fake_role", - scalar=addict.Dict(value=1000.0), - ), - addict.Dict( - name="gpus", - type="SCALAR", - role="fake_role", - scalar=addict.Dict(value=gpus_count), - ), - addict.Dict( - name="ports", - type="RANGES", - role="fake_role", - ranges=addict.Dict(range=[addict.Dict(begin=31200, end=31200)]), - ), - ], - command=addict.Dict( - value='echo "fake"', - uris=[], - environment=addict.Dict( - variables=[{"name": "MESOS_TASK_ID", "value": mock.ANY}] - ), - ), - container=container, - ) - assert task_info == expected_task_info - - -@mock.patch("task_processing.plugins.mesos.translator.time") -def test_mesos_update_to_event(mock_time): - mock_time.time.return_value = 12345678.0 - for key, val in MESOS_STATUS_MAP.items(): - mesos_status = mock.MagicMock() - mesos_status.state = key - assert mesos_update_to_event(mesos_status, addict.Dict(task_id="123")) == Event( - kind="task", - raw=mesos_status, - task_id="123", - task_config={"task_id": "123"}, - timestamp=12345678.0, - terminal=val.terminal, - platform_type=val.platform_type, - success=val.get("success", None), - ) diff --git a/tests/unit/plugins/persistence/dynamo_persistence_test.py b/tests/unit/plugins/persistence/dynamo_persistence_test.py deleted file mode 100644 index dff31084..00000000 --- a/tests/unit/plugins/persistence/dynamo_persistence_test.py +++ /dev/null @@ -1,99 +0,0 @@ -import pytest -from hypothesis import given -from hypothesis import HealthCheck -from hypothesis import settings -from hypothesis import strategies as st - -from task_processing.interfaces.event import Event -from task_processing.plugins.persistence.dynamodb_persistence import DynamoDBPersister - - -@pytest.fixture -def persister(mocker): - mock_session = mocker.Mock() - mock_session.client.return_value = [] - - mock_resource = mocker.Mock() - mock_resource.Table.return_value = mocker.Mock() - mock_session.resource.return_value = mock_resource - persister = DynamoDBPersister(table_name="foo", session=mock_session) - return persister - - -@settings(suppress_health_check=(HealthCheck.function_scoped_fixture,)) -@given( - x=st.dictionaries( - keys=st.text(), values=st.decimals(allow_nan=False, allow_infinity=False) - ) -) -def test_replaces_decimals_dict(x, persister): - for k, v in persister._replace_decimals(x).items(): - assert type(v) == float - - -@settings(suppress_health_check=(HealthCheck.function_scoped_fixture,)) -@given(x=st.decimals(allow_nan=False, allow_infinity=False)) -def test_replaces_decimals_decimal(x, persister): - assert type(persister._replace_decimals(x)) is float - - -@settings(suppress_health_check=(HealthCheck.function_scoped_fixture,)) -@given(x=st.lists(st.decimals(allow_nan=False, allow_infinity=False))) -def test_replaces_decimals_list(x, persister): - assert all([type(v) == float for v in persister._replace_decimals(x)]) - - -@settings(suppress_health_check=(HealthCheck.function_scoped_fixture,)) -@given( - x=st.one_of( - st.text(), - st.booleans(), - ) -) -def test_replaces_decimals_unaffected(x, persister): - assert persister._replace_decimals(x) == x - - -texts = st.text(max_size=5) -events = st.builds( - Event, - kind=st.sampled_from(["task", "control"]), - task_id=texts, - timestamp=st.floats(min_value=0, allow_nan=False, allow_infinity=False), - terminal=st.booleans(), - success=st.booleans(), - task_config=st.dictionaries( - max_size=5, - keys=texts, - values=st.lists( - st.one_of( - texts, - st.dictionaries(max_size=5, keys=texts, values=texts), - ), - max_size=5, - ), - ), - raw=st.sampled_from([None]), -) - - -@settings(max_examples=50, suppress_health_check=(HealthCheck.function_scoped_fixture,)) -@given(x=events) -def test_event_to_item_timestamp(x, persister): - res = persister._event_to_item(x)["M"] - assert "N" in res["timestamp"].keys() - assert "BOOL" in res["success"].keys() - assert "BOOL" in res["terminal"].keys() - assert "M" in res["task_config"].keys() - - -@settings(max_examples=50, suppress_health_check=(HealthCheck.function_scoped_fixture,)) -@given(x=events) -def test_event_to_item_list(x, persister): - res = persister._event_to_item(x)["M"] - for k, v in x.task_config.items(): - if len(v) > 0: - assert k in res["task_config"]["M"] - else: - assert k not in res["task_config"]["M"] - assert all([{"S": val} in ["task_config"]["M"][k]["L"] for val in v]) diff --git a/tests/unit/plugins/stateful/stateful_executor_test.py b/tests/unit/plugins/stateful/stateful_executor_test.py deleted file mode 100644 index 355fd172..00000000 --- a/tests/unit/plugins/stateful/stateful_executor_test.py +++ /dev/null @@ -1,44 +0,0 @@ -import mock -import pytest - -from task_processing.plugins.stateful.stateful_executor import StatefulTaskExecutor - - -@pytest.fixture -def mock_downstream(): - return mock.MagicMock() - - -@pytest.fixture -def mock_persister(): - return mock.MagicMock() - - -@pytest.fixture -def mock_stateful_executor(mock_Thread, mock_downstream, mock_persister): - return StatefulTaskExecutor( - downstream_executor=mock_downstream, - persister=mock_persister, - ) - - -def test_run(mock_stateful_executor, mock_downstream): - mock_config = mock.MagicMock() - mock_stateful_executor.run(mock_config) - assert mock_downstream.run.call_count == 1 - - -def test_kill(mock_stateful_executor, mock_downstream): - result = mock_stateful_executor.kill("task") - assert result == mock_downstream.kill.return_value - assert mock_downstream.kill.call_args == mock.call("task") - - -def test_reconcile(mock_stateful_executor, mock_downstream): - mock_stateful_executor.reconcile("task") - assert mock_downstream.reconcile.call_args == mock.call("task") - - -def test_stop(mock_stateful_executor, mock_downstream): - mock_stateful_executor.stop() - assert mock_downstream.stop.call_args == mock.call() diff --git a/tox.ini b/tox.ini index ef766537..ac681e95 100644 --- a/tox.ini +++ b/tox.ini @@ -6,18 +6,13 @@ passenv = PIP_INDEX_URL deps = -rrequirements-dev.txt commands = - pip install -e .[mesos_executor,persistence,k8s] + pip install -e .[persistence,k8s] - pip install yelp-meteorite mypy task_processing pytest {posargs:tests}/unit pre-commit install -f --install-hooks pre-commit run --all-files -[testenv:mesos] -basepython = /usr/bin/python3.8 -commands = - pip install -e .[mesos_executor] - [testenv:docs] deps = sphinx commands = @@ -30,24 +25,11 @@ deps = twine commands = python setup.py sdist bdist_wheel -[testenv:integration] -deps = - docker-compose==1.7.1 -commands = - docker-compose -f examples/cluster/docker-compose.yaml down - docker-compose -f examples/cluster/docker-compose.yaml pull - docker-compose -f examples/cluster/docker-compose.yaml build - docker-compose -f examples/cluster/docker-compose.yaml \ - up -d zookeeper mesosmaster mesosagent - docker-compose -f examples/cluster/docker-compose.yaml scale mesosagent=1 - docker-compose -f examples/cluster/docker-compose.yaml \ - run playground /src/itest - [testenv:venv] basepython = /usr/bin/python3.8 envdir = venv commands = - pip install -e .[mesos_executor,metrics,persistence,k8s] + pip install -e .[metrics,persistence,k8s] [flake8] exclude = .git,__pycache__,.tox,docs,venv