Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Always use docker registry server's digest information for pushed spark images #3756

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions paasta_tools/cli/cmds/spark_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
from paasta_tools.spark_tools import inject_spark_conf_str
from paasta_tools.utils import _run
from paasta_tools.utils import DEFAULT_SOA_DIR
from paasta_tools.utils import get_docker_client
from paasta_tools.utils import get_possible_launched_by_user_variable_from_env
from paasta_tools.utils import get_username
from paasta_tools.utils import InstanceConfig
Expand Down Expand Up @@ -1086,11 +1085,28 @@ def build_and_push_docker_image(args: argparse.Namespace) -> Optional[str]:
command = "docker push %s" % docker_url

print(PaastaColors.grey(command))
retcode, _ = _run(command, stream=True)
retcode, output = _run(command, stream=False)
if retcode != 0:
return None

return docker_url
# With unprivileged docker, the digest on the remote registry may not match the digest
# in the local environment. Because of this, we have to parse the digest message from the
# server response and use downstream when launching spark executors

# Output from `docker push` with unprivileged docker looks like
# Using default tag: latest
# The push refers to repository [docker-dev.yelpcorp.com/paasta-spark-run-dpopes:latest]
# latest: digest: sha256:0a43aa65174a400bd280d48d460b73eb49b0ded4072c9e173f919543bf693557

# With privileged docker, the last line has an extra "size: 123"
# latest: digest: sha256:0a43aa65174a400bd280d48d460b73eb49b0ded4072c9e173f919543bf693557 size: 52

digest_line = output.split("\n")[-1]
digest_match = re.match(r"[^:]*: [^:]*: (?P<digest>[^\s]*)", digest_line)
if not digest_match:
raise ValueError(f"Could not determine digest from output: {output}")
digest = digest_match.group("digest")
return f"{docker_url}@{digest}"


def validate_work_dir(s):
Expand Down Expand Up @@ -1259,18 +1275,10 @@ def paasta_spark_run(args):
assume_aws_role_arn=args.assume_aws_role,
session_duration=args.aws_role_duration,
)
docker_image = get_docker_image(args, instance_config)
if docker_image is None:
docker_image_digest = get_docker_image(args, instance_config)
if docker_image_digest is None:
return 1

# Get image digest
docker_client = get_docker_client()
image_details = docker_client.inspect_image(docker_image)
if len(image_details["RepoDigests"]) < 1:
print("Failed to get docker image digest", file=sys.stderr)
return None
docker_image_digest = image_details["RepoDigests"][0]

pod_template_path = generate_pod_template_path()
args.enable_compact_bin_packing = should_enable_compact_bin_packing(
args.disable_compact_bin_packing, args.cluster_manager
Expand Down
118 changes: 97 additions & 21 deletions tests/cli/test_cmds_spark_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
import mock
import pytest
from boto3.exceptions import Boto3Error
from mock import Mock
from service_configuration_lib import spark_config

from paasta_tools.cli.cmds import spark_run
from paasta_tools.cli.cmds.spark_run import _should_get_resource_requirements
from paasta_tools.cli.cmds.spark_run import build_and_push_docker_image
from paasta_tools.cli.cmds.spark_run import CLUSTER_MANAGER_K8S
from paasta_tools.cli.cmds.spark_run import CLUSTER_MANAGER_MESOS
from paasta_tools.cli.cmds.spark_run import configure_and_run_docker_container
Expand Down Expand Up @@ -139,23 +139,6 @@ def mock_run():
yield m


@pytest.fixture
def mock_get_docker_client():
fake_image_info = {
"RepoDigests": [
DUMMY_DOCKER_IMAGE_DIGEST,
],
}
docker_client = Mock(inspect_image=Mock(return_value=fake_image_info))

with mock.patch(
"paasta_tools.cli.cmds.spark_run.get_docker_client",
return_value=docker_client,
autospec=True,
) as m:
yield m


@pytest.mark.parametrize(
"args,expected_output",
[
Expand Down Expand Up @@ -1121,7 +1104,6 @@ def test_paasta_spark_run_bash(
mock_load_system_paasta_config,
mock_validate_work_dir,
mock_generate_pod_template_path,
mock_get_docker_client,
):
args = argparse.Namespace(
work_dir="/tmp/local",
Expand Down Expand Up @@ -1154,6 +1136,7 @@ def test_paasta_spark_run_bash(
"test-cluster": ["test-pool"]
}
mock_should_enable_compact_bin_packing.return_value = True
mock_get_docker_image.return_value = DUMMY_DOCKER_IMAGE_DIGEST
spark_run.paasta_spark_run(args)
mock_validate_work_dir.assert_called_once_with("/tmp/local")
assert args.cmd == "/bin/bash"
Expand Down Expand Up @@ -1235,7 +1218,6 @@ def test_paasta_spark_run(
mock_load_system_paasta_config,
mock_validate_work_dir,
mock_generate_pod_template_path,
mock_get_docker_client,
):
args = argparse.Namespace(
work_dir="/tmp/local",
Expand Down Expand Up @@ -1268,6 +1250,7 @@ def test_paasta_spark_run(
"test-cluster": ["test-pool"]
}
mock_should_enable_compact_bin_packing.return_value = True
mock_get_docker_image.return_value = DUMMY_DOCKER_IMAGE_DIGEST
spark_run.paasta_spark_run(args)
mock_validate_work_dir.assert_called_once_with("/tmp/local")
assert args.cmd == "USER=test timeout 1m spark-submit test.py"
Expand Down Expand Up @@ -1348,7 +1331,6 @@ def test_paasta_spark_run_pyspark(
mock_load_system_paasta_config,
mock_validate_work_dir,
mock_generate_pod_template_path,
mock_get_docker_client,
):
args = argparse.Namespace(
work_dir="/tmp/local",
Expand Down Expand Up @@ -1384,6 +1366,7 @@ def test_paasta_spark_run_pyspark(
"test-cluster": ["test-pool"]
}

mock_get_docker_image.return_value = DUMMY_DOCKER_IMAGE_DIGEST
spark_run.paasta_spark_run(args)
mock_validate_work_dir.assert_called_once_with("/tmp/local")
assert args.cmd == "pyspark"
Expand Down Expand Up @@ -1482,3 +1465,96 @@ def test_decide_final_eks_toggle_state(override, default, expected):
)

assert decide_final_eks_toggle_state(override) is expected


@mock.patch.object(spark_run, "makefile_responds_to", autospec=True)
@mock.patch.object(spark_run, "paasta_cook_image", autospec=True)
@mock.patch.object(spark_run, "get_username", autospec=True)
def test_build_and_push_docker_image_unprivileged_output_format(
mock_get_username,
mock_paasta_cook_image,
mock_makefile_responds_to,
mock_run,
):
args = mock.MagicMock(
docker_registry="MOCK-docker-dev.yelpcorp.com",
autospec=True,
)
mock_makefile_responds_to.return_value = True
mock_paasta_cook_image.return_value = 0
mock_run.side_effect = [
(0, None),
(
0,
(
"Using default tag: latest\n"
"The push refers to repository [MOCK-docker-dev.yelpcorp.com/paasta-spark-run-user:latest]\n"
"latest: digest: sha256:103ce91c65d42498ca61cdfe8d799fab8ab1c37dac58b743b49ced227bc7bc06"
),
),
]
mock_get_username.return_value = "user"
docker_image_digest = build_and_push_docker_image(args)
assert DUMMY_DOCKER_IMAGE_DIGEST == docker_image_digest


@mock.patch.object(spark_run, "makefile_responds_to", autospec=True)
@mock.patch.object(spark_run, "paasta_cook_image", autospec=True)
@mock.patch.object(spark_run, "get_username", autospec=True)
def test_build_and_push_docker_image_privileged_output_format(
mock_get_username,
mock_paasta_cook_image,
mock_makefile_responds_to,
mock_run,
):
args = mock.MagicMock(
docker_registry="MOCK-docker-dev.yelpcorp.com",
autospec=True,
)
mock_makefile_responds_to.return_value = True
mock_paasta_cook_image.return_value = 0
mock_run.side_effect = [
(0, None),
(
0,
(
"Using default tag: latest\n"
"The push refers to repository [MOCK-docker-dev.yelpcorp.com/paasta-spark-run-user:latest]\n"
"latest: digest: sha256:103ce91c65d42498ca61cdfe8d799fab8ab1c37dac58b743b49ced227bc7bc06 size: 1337"
),
),
]
mock_get_username.return_value = "user"
docker_image_digest = build_and_push_docker_image(args)
assert DUMMY_DOCKER_IMAGE_DIGEST == docker_image_digest


@mock.patch.object(spark_run, "makefile_responds_to", autospec=True)
@mock.patch.object(spark_run, "paasta_cook_image", autospec=True)
@mock.patch.object(spark_run, "get_username", autospec=True)
def test_build_and_push_docker_image_unexpected_output_format(
mock_get_username,
mock_paasta_cook_image,
mock_makefile_responds_to,
mock_run,
):
args = mock.MagicMock(
docker_registry="MOCK-docker-dev.yelpcorp.com",
autospec=True,
)
mock_makefile_responds_to.return_value = True
mock_paasta_cook_image.return_value = 0
mock_run.side_effect = [
(0, None),
(
0,
(
"Using default tag: latest\n"
"The push refers to repository [MOCK-docker-dev.yelpcorp.com/paasta-spark-run-user:latest]\n"
"the regex will not match this"
),
),
]
with pytest.raises(ValueError) as e:
build_and_push_docker_image(args)
assert "Could not determine digest from output" in str(e.value)
Loading