From 6ae003bac4e23d5288a72514a20eee3d064da69e Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman Date: Tue, 23 Mar 2021 05:49:21 -0700 Subject: [PATCH 1/8] Use subnets in one VPC --- python/ray/autoscaler/_private/aws/config.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/ray/autoscaler/_private/aws/config.py b/python/ray/autoscaler/_private/aws/config.py index cd9d9c1c11ce..09eaafd7bea5 100644 --- a/python/ray/autoscaler/_private/aws/config.py +++ b/python/ray/autoscaler/_private/aws/config.py @@ -415,6 +415,13 @@ def _configure_subnet(config): "to populate the list of subnets and trying this again.". format(config["provider"]["availability_zone"])) + # Generate subnets in only one VPC. This is to make sure we don't attempt + # to launch a node into a VPC outside the security group + # generated by _configure_security_group. + # See the error referenced here + # https://github.com/ray-project/ray/pull/5053#discussion_r300139092. + subnets = [s for s in subnets if s.vpc_id == subnets[0].vpc_id] + subnet_ids = [s.subnet_id for s in subnets] if "SubnetIds" not in config["head_node"]: _set_config_info(head_subnet_src="default") From ed034e551dfef9a5f930325ff6c064cee41d5077 Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman Date: Tue, 23 Mar 2021 06:02:10 -0700 Subject: [PATCH 2/8] Refer to pull request --- python/ray/autoscaler/_private/aws/config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/ray/autoscaler/_private/aws/config.py b/python/ray/autoscaler/_private/aws/config.py index 09eaafd7bea5..c6ccda927b10 100644 --- a/python/ray/autoscaler/_private/aws/config.py +++ b/python/ray/autoscaler/_private/aws/config.py @@ -418,8 +418,7 @@ def _configure_subnet(config): # Generate subnets in only one VPC. This is to make sure we don't attempt # to launch a node into a VPC outside the security group # generated by _configure_security_group. - # See the error referenced here - # https://github.com/ray-project/ray/pull/5053#discussion_r300139092. + # See https://github.com/ray-project/ray/pull/14868. subnets = [s for s in subnets if s.vpc_id == subnets[0].vpc_id] subnet_ids = [s.subnet_id for s in subnets] From b8b261e360ac28427ff9073b9c6f465d1c82ce11 Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman Date: Tue, 23 Mar 2021 08:06:14 -0700 Subject: [PATCH 3/8] wip -- tests failing --- python/ray/tests/aws/test_autoscaler_aws.py | 43 +++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/python/ray/tests/aws/test_autoscaler_aws.py b/python/ray/tests/aws/test_autoscaler_aws.py index 09c736a07c06..cb76424761c9 100644 --- a/python/ray/tests/aws/test_autoscaler_aws.py +++ b/python/ray/tests/aws/test_autoscaler_aws.py @@ -11,6 +11,49 @@ DEFAULT_SG_WITH_NAME, DEFAULT_SG_WITH_NAME_AND_RULES, CUSTOM_IN_BOUND_RULES +def test_use_subnets_in_only_one_vpc(iam_client_stub, ec2_client_stub): + stubs.configure_iam_role_default(iam_client_stub) + stubs.configure_key_pair_default(ec2_client_stub) + + # Add a response with a thousand subnets all in different VPCs. + # After filtering, only subnet in one particular VPC should remain. + # Thus head_node.SubnetIds and worker_nodes.SubnetIds should end up as + # being length-one lists after the bootstrap_config. + stubs.describe_a_thousand_subnets_in_different_vpcs(ec2_client_stub) + # describe the subnet in use while determining its vpc + stubs.describe_subnets_echo(ec2_client_stub, DEFAULT_SUBNET) + # given no existing security groups within the VPC... + stubs.describe_no_security_groups(ec2_client_stub) + # expect to create a security group on the VPC + stubs.create_sg_echo(ec2_client_stub, DEFAULT_SG) + # expect new security group details to be retrieved after creation + stubs.describe_sgs_on_vpc( + ec2_client_stub, + [DEFAULT_SUBNET["VpcId"]], + [DEFAULT_SG], + ) + + # given no existing default security group inbound rules... + # expect to authorize all default inbound rules + stubs.authorize_sg_ingress( + ec2_client_stub, + DEFAULT_SG_WITH_NAME_AND_RULES, + ) + + # given our mocks and an example config file as input... + # expect the config to be loaded, validated, and bootstrapped successfully + config = helpers.bootstrap_aws_example_config_file("example-full.yaml") + + # We've filtered down to only one subnet id. + assert config["head_node"]["SubnetIds"] == [DEFAULT_SUBNET["SubnetId"]] + assert config["worker_nodes"]["SubnetIds"] == [DEFAULT_SUBNET["SubnetId"]] + # Check that the security group has been filled correctly. + assert config["head_node"]["SecurityGroupIds"] == [DEFAULT_SG["GroupId"]] + assert config["worker_nodes"]["SecurityGroupIds"] == [ + DEFAULT_SG["GroupId"] + ] + + def test_create_sg_different_vpc_same_rules(iam_client_stub, ec2_client_stub): # use default stubs to skip ahead to security group configuration stubs.skip_to_configure_sg(ec2_client_stub, iam_client_stub) From 983172c2b98ec027e36f7f879f9717f80f88c1b1 Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman Date: Tue, 23 Mar 2021 09:49:36 -0700 Subject: [PATCH 4/8] Finish unit test, ready for review --- python/ray/tests/aws/test_autoscaler_aws.py | 23 ++++++++++++++++++--- python/ray/tests/aws/utils/stubs.py | 12 +++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/python/ray/tests/aws/test_autoscaler_aws.py b/python/ray/tests/aws/test_autoscaler_aws.py index cb76424761c9..011cdee6689d 100644 --- a/python/ray/tests/aws/test_autoscaler_aws.py +++ b/python/ray/tests/aws/test_autoscaler_aws.py @@ -8,10 +8,16 @@ from ray.tests.aws.utils.constants import AUX_SUBNET, DEFAULT_SUBNET, \ DEFAULT_SG_AUX_SUBNET, DEFAULT_SG, DEFAULT_SG_DUAL_GROUP_RULES, \ DEFAULT_SG_WITH_RULES_AUX_SUBNET, AUX_SG, \ - DEFAULT_SG_WITH_NAME, DEFAULT_SG_WITH_NAME_AND_RULES, CUSTOM_IN_BOUND_RULES + DEFAULT_SG_WITH_RULES, DEFAULT_SG_WITH_NAME, \ + DEFAULT_SG_WITH_NAME_AND_RULES, CUSTOM_IN_BOUND_RULES def test_use_subnets_in_only_one_vpc(iam_client_stub, ec2_client_stub): + """ + This test validates that when bootstrap_aws populates the SubnetIds field, + all of the subnets used belong to the same VPC, and that a SecurityGroup + in that VPC is correctly configured. + """ stubs.configure_iam_role_default(iam_client_stub) stubs.configure_key_pair_default(ec2_client_stub) @@ -20,6 +26,7 @@ def test_use_subnets_in_only_one_vpc(iam_client_stub, ec2_client_stub): # Thus head_node.SubnetIds and worker_nodes.SubnetIds should end up as # being length-one lists after the bootstrap_config. stubs.describe_a_thousand_subnets_in_different_vpcs(ec2_client_stub) + # describe the subnet in use while determining its vpc stubs.describe_subnets_echo(ec2_client_stub, DEFAULT_SUBNET) # given no existing security groups within the VPC... @@ -37,14 +44,22 @@ def test_use_subnets_in_only_one_vpc(iam_client_stub, ec2_client_stub): # expect to authorize all default inbound rules stubs.authorize_sg_ingress( ec2_client_stub, - DEFAULT_SG_WITH_NAME_AND_RULES, + DEFAULT_SG_WITH_RULES, + ) + + # expect another call to describe the above security group while checking + # a second time if it has ip_permissions set ("if not sg.ip_permissions") + stubs.describe_an_sg_2( + ec2_client_stub, + DEFAULT_SG_WITH_RULES, ) # given our mocks and an example config file as input... # expect the config to be loaded, validated, and bootstrapped successfully config = helpers.bootstrap_aws_example_config_file("example-full.yaml") - # We've filtered down to only one subnet id. + # We've filtered down to only one subnet id -- only one of the thousand + # subnets generated by ec2.subnets.all() belongs to the right VPC. assert config["head_node"]["SubnetIds"] == [DEFAULT_SUBNET["SubnetId"]] assert config["worker_nodes"]["SubnetIds"] == [DEFAULT_SUBNET["SubnetId"]] # Check that the security group has been filled correctly. @@ -55,6 +70,8 @@ def test_use_subnets_in_only_one_vpc(iam_client_stub, ec2_client_stub): def test_create_sg_different_vpc_same_rules(iam_client_stub, ec2_client_stub): + _get_vpc_id_or_die.cache_clear() + # use default stubs to skip ahead to security group configuration stubs.skip_to_configure_sg(ec2_client_stub, iam_client_stub) diff --git a/python/ray/tests/aws/utils/stubs.py b/python/ray/tests/aws/utils/stubs.py index 61f1f9ab632b..175943298c06 100644 --- a/python/ray/tests/aws/utils/stubs.py +++ b/python/ray/tests/aws/utils/stubs.py @@ -85,6 +85,18 @@ def describe_a_security_group(ec2_client_stub, security_group): service_response={"SecurityGroups": [security_group]}) +def describe_an_sg_2(ec2_client_stub, security_group): + """Same as last method, different input param format. + + A call with this input parameter format is made when sg.ip_permissions is + accessed in aws/config.py. + """ + ec2_client_stub.add_response( + "describe_security_groups", + expected_params={"GroupIds": [security_group["GroupId"]]}, + service_response={"SecurityGroups": [security_group]}) + + def create_sg_echo(ec2_client_stub, security_group): ec2_client_stub.add_response( "create_security_group", From 3e74705889f7cc326b2cc8383aeb8d31838bd458 Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman Date: Tue, 23 Mar 2021 09:57:24 -0700 Subject: [PATCH 5/8] Allen comment -- Combine lines --- python/ray/autoscaler/_private/aws/config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/ray/autoscaler/_private/aws/config.py b/python/ray/autoscaler/_private/aws/config.py index c6ccda927b10..35488b9ec40d 100644 --- a/python/ray/autoscaler/_private/aws/config.py +++ b/python/ray/autoscaler/_private/aws/config.py @@ -415,13 +415,13 @@ def _configure_subnet(config): "to populate the list of subnets and trying this again.". format(config["provider"]["availability_zone"])) - # Generate subnets in only one VPC. This is to make sure we don't attempt + # Use subnets in only one VPC. This is to make sure we don't attempt # to launch a node into a VPC outside the security group # generated by _configure_security_group. # See https://github.com/ray-project/ray/pull/14868. - subnets = [s for s in subnets if s.vpc_id == subnets[0].vpc_id] - - subnet_ids = [s.subnet_id for s in subnets] + subnet_ids = [ + s.subnet_id for s in subnets if s.vpc_id == subnets[0].vpc_id + ] if "SubnetIds" not in config["head_node"]: _set_config_info(head_subnet_src="default") config["head_node"]["SubnetIds"] = subnet_ids From 9a81c54b01a32b7290fcea0242315617c80655b9 Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman Date: Tue, 23 Mar 2021 10:01:17 -0700 Subject: [PATCH 6/8] Move cache clear --- python/ray/tests/aws/test_autoscaler_aws.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/ray/tests/aws/test_autoscaler_aws.py b/python/ray/tests/aws/test_autoscaler_aws.py index 011cdee6689d..78fd71cd31ee 100644 --- a/python/ray/tests/aws/test_autoscaler_aws.py +++ b/python/ray/tests/aws/test_autoscaler_aws.py @@ -57,6 +57,7 @@ def test_use_subnets_in_only_one_vpc(iam_client_stub, ec2_client_stub): # given our mocks and an example config file as input... # expect the config to be loaded, validated, and bootstrapped successfully config = helpers.bootstrap_aws_example_config_file("example-full.yaml") + _get_vpc_id_or_die.cache_clear() # We've filtered down to only one subnet id -- only one of the thousand # subnets generated by ec2.subnets.all() belongs to the right VPC. @@ -70,8 +71,6 @@ def test_use_subnets_in_only_one_vpc(iam_client_stub, ec2_client_stub): def test_create_sg_different_vpc_same_rules(iam_client_stub, ec2_client_stub): - _get_vpc_id_or_die.cache_clear() - # use default stubs to skip ahead to security group configuration stubs.skip_to_configure_sg(ec2_client_stub, iam_client_stub) From 8fd3106ff207898bb0845f9e337e7b28738d84fd Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman Date: Tue, 23 Mar 2021 10:02:55 -0700 Subject: [PATCH 7/8] tweak --- python/ray/tests/aws/utils/stubs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ray/tests/aws/utils/stubs.py b/python/ray/tests/aws/utils/stubs.py index 175943298c06..1c48ed47667b 100644 --- a/python/ray/tests/aws/utils/stubs.py +++ b/python/ray/tests/aws/utils/stubs.py @@ -86,7 +86,7 @@ def describe_a_security_group(ec2_client_stub, security_group): def describe_an_sg_2(ec2_client_stub, security_group): - """Same as last method, different input param format. + """Same as last function, different input param format. A call with this input parameter format is made when sg.ip_permissions is accessed in aws/config.py. From 87f386d5efdf594a9f0966faab2733fc914380bc Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman Date: Tue, 23 Mar 2021 12:30:59 -0700 Subject: [PATCH 8/8] Charles -- clarify comment --- python/ray/autoscaler/_private/aws/config.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/ray/autoscaler/_private/aws/config.py b/python/ray/autoscaler/_private/aws/config.py index 35488b9ec40d..83b25d11605b 100644 --- a/python/ray/autoscaler/_private/aws/config.py +++ b/python/ray/autoscaler/_private/aws/config.py @@ -415,9 +415,10 @@ def _configure_subnet(config): "to populate the list of subnets and trying this again.". format(config["provider"]["availability_zone"])) - # Use subnets in only one VPC. This is to make sure we don't attempt - # to launch a node into a VPC outside the security group - # generated by _configure_security_group. + # Use subnets in only one VPC, so that _configure_security_groups only + # needs to create a security group in this one VPC. Otherwise, we'd need + # to set up security groups in all of the user's VPCs and set up networking + # rules to allow traffic between these groups. # See https://github.com/ray-project/ray/pull/14868. subnet_ids = [ s.subnet_id for s in subnets if s.vpc_id == subnets[0].vpc_id