From 25d822af1bbce77d0e1540fa585b9f2c6e41a0aa Mon Sep 17 00:00:00 2001 From: Michael Gummelt Date: Tue, 27 Dec 2016 13:51:36 -0800 Subject: [PATCH] teragen hdfs integration test --- bin/test.sh | 28 ++------------------------- tests/test.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 26 deletions(-) diff --git a/bin/test.sh b/bin/test.sh index 31551b94d205a..6e9782de9f986 100755 --- a/bin/test.sh +++ b/bin/test.sh @@ -60,35 +60,11 @@ configure_cli() { fi } -install_spark() { - notify_github pending "Installing Spark" - +setup_permissions() { if [ "$SECURITY" = "strict" ]; then # custom configuration to enable auth stuff: ${COMMONS_TOOLS_DIR}/setup_permissions.sh nobody "*" # spark's default service.role - echo '{ "service": { "user": "nobody", "principal": "service-acct", "secret_name": "secret" } }' > /tmp/spark.json - dcos --log-level=INFO package install spark --options=/tmp/spark.json --yes - else - dcos --log-level=INFO package install spark --yes - fi - - if [ $? -ne 0 ]; then - notify_github failure "Spark install failed" - exit 1 fi - - SECONDS=0 - while [[ $(dcos marathon app list --json | jq '.[] | select(.id=="/spark") | .tasksHealthy') -ne "1" ]] - do - sleep 5 - if [ $SECONDS -gt 600 ]; then # 10 mins - notify_github failure "Spark install timed out" - exit 1 - fi - done - - # sleep 30s due to mesos-dns propagation delays to /service/sparkcli/ - sleep 30 } run_tests() { @@ -113,7 +89,7 @@ fetch_commons_tools start_cluster # TODO: Migrate the following three commands to dcos-commons-tools/run-tests.py configure_cli -install_spark +setup_permissions run_tests notify_github success "Tests Passed" diff --git a/tests/test.py b/tests/test.py index bdef88eaa2dec..74df69618cac0 100644 --- a/tests/test.py +++ b/tests/test.py @@ -7,11 +7,64 @@ from boto.s3.connection import S3Connection from boto.s3.key import Key +import dcos.config +import dcos.http +import dcos.package import os import pytest import re import shakedown import subprocess +import urllib + + +def setup_module(module): + _require_package('hdfs') + _install_spark() + + +def _install_spark(): + options = {"hdfs": + {"config-url": + "http://hdfs.marathon.mesos:9000/v1/connection"}} + + if os.environ.get('SECURITY') == 'strict': + options['service'] = {"user": "nobody", + "principal": "service-acct", + "secret_name": "secret" } + + shakedown.install_package('spark', options_json=options, wait_for_completion=True) + + def pred(): + dcos_url = dcos.config.get_config_val("core.dcos_url") + spark_url = urllib.parse.urljoin(dcos_url, "/service/spark") + status_code = dcos.http.get(spark_url).status_code + return status_code == 200 + + shakedown.spinner.wait_for(pred) + + +def _require_package(pkg_name): + pkg_manager = dcos.package.get_package_manager() + installed_pkgs = dcos.package.installed_packages(pkg_manager, None, None, False) + if not any(pkg['name'] == pkg_name for pkg in installed_pkgs): + shakedown.install_package(pkg_name, wait_for_completion=True) + shakedown.wait_for(_is_hdfs_ready, ignore_exceptions=False, timeout_seconds=600) + + +DEFAULT_HDFS_TASK_COUNT=8 +def _is_hdfs_ready(expected_tasks = DEFAULT_HDFS_TASK_COUNT): + running_tasks = [t for t in shakedown.get_service_tasks('hdfs') \ + if t['state'] == 'TASK_RUNNING'] + return len(running_tasks) >= expected_tasks + + +def test_teragen(): + jar_url = "https://downloads.mesosphere.io/spark/examples/spark-terasort-1.0-jar-with-dependencies_2.11.jar" + _run_tests(jar_url, + "1g hdfs:///terasort_in", + "Number of records written", + {"--class": "com.github.ehiggs.spark.terasort.TeraGen"}) def test_jar():