Skip to content

Commit

Permalink
cartography-cncf#1210: EBSVolume => new data model, Allow node attr u…
Browse files Browse the repository at this point in the history
…pdates from multiple intel modules (cartography-cncf#1214)

See cartography-cncf#1210 for full context.

cartography-cncf#1154 tried to solve this problem by updating the querybuilder but this
was too complex and would not generalize well.

This solution is simpler where we use different property classes for
each API response so that we don't overwrite properties on a node set by
another sync job.

This PR can be reviewed commit-by-commit:
- c0d9ac4 shows a repro of the error
with a failing integration test.
- facb63b shows the solution using
multiple classes.

---------

Co-authored-by: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com>
  • Loading branch information
2 people authored and chandan-cl committed Jun 26, 2024
1 parent 37867ce commit d75b1de
Show file tree
Hide file tree
Showing 6 changed files with 237 additions and 147 deletions.
4 changes: 2 additions & 2 deletions cartography/intel/aws/ec2/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from cartography.models.aws.ec2.reservations import EC2ReservationSchema
from cartography.models.aws.ec2.securitygroups import EC2SecurityGroupSchema
from cartography.models.aws.ec2.subnets import EC2SubnetSchema
from cartography.models.aws.ec2.volumes import EBSVolumeSchema
from cartography.models.aws.ec2.volumes import EBSVolumeInstanceSchema
from cartography.util import aws_handle_regions
from cartography.util import timeit

Expand Down Expand Up @@ -273,7 +273,7 @@ def load_ec2_instance_ebs_volumes(
) -> None:
load(
neo4j_session,
EBSVolumeSchema(),
EBSVolumeInstanceSchema(),
ebs_data,
Region=region,
AWS_ID=current_aws_account_id,
Expand Down
118 changes: 53 additions & 65 deletions cartography/intel/aws/ec2/volumes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import boto3
import neo4j

from cartography.client.core.tx import load
from cartography.graph.job import GraphJob
from cartography.intel.aws.util.arns import build_arn
from cartography.models.aws.ec2.volumes import EBSVolumeSchema
from cartography.util import aws_handle_regions
from cartography.util import timeit
Expand All @@ -16,7 +18,7 @@

@timeit
@aws_handle_regions
def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict[str, Any]]:
client = boto3_session.client('ec2', region_name=region)
paginator = client.get_paginator('describe_volumes')
volumes: List[Dict] = []
Expand All @@ -26,90 +28,76 @@ def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict]


def transform_volumes(volumes: List[Dict[str, Any]], region: str, current_aws_account_id: str) -> List[Dict[str, Any]]:
result = []
for volume in volumes:
volume['VolumeArn'] = f"arn:aws:ec2:{region}:{current_aws_account_id}:volume/{volume['VolumeId']}"
volume['CreateTime'] = str(volume['CreateTime'])
return volumes
attachments = volume.get('Attachments', [])
active_attachments = [a for a in attachments if a['State'] == 'attached']

volume_id = volume['VolumeId']
raw_vol = ({
'Arn': build_arn('ec2', current_aws_account_id, 'volume', volume_id, region),
'AvailabilityZone': volume['AvailabilityZone'],
'CreateTime': volume['CreateTime'],
'Encrypted': volume['Encrypted'],
'Size': volume['Size'],
'State': volume['State'],
'OutpostArn': volume['OutpostArn'],
'SnapshotId': volume['SnapshotId'],
'Iops': volume['Iops'],
'FastRestored': volume['FastRestored'],
'MultiAttachEnabled': volume['MultiAttachEnabled'],
'VolumeType': volume['VolumeType'],
'VolumeId': volume_id,
'KmsKeyId': volume['KmsKeyId'],
})

if not active_attachments:
result.append(raw_vol)
continue

for attachment in active_attachments:
vol_with_attachment = raw_vol.copy()
vol_with_attachment['InstanceId'] = attachment['InstanceId']
result.append(vol_with_attachment)

return result


@timeit
def load_volumes(
neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str, update_tag: int,
neo4j_session: neo4j.Session,
ebs_data: List[Dict[str, Any]],
region: str,
current_aws_account_id: str,
update_tag: int,
) -> None:
ingest_volumes = """
UNWIND $volumes_list as volume
MERGE (vol:EBSVolume{id: volume.VolumeId})
ON CREATE SET vol.firstseen = timestamp()
SET vol.arn = volume.VolumeArn,
vol.lastupdated = $update_tag,
vol.availabilityzone = volume.AvailabilityZone,
vol.createtime = volume.CreateTime,
vol.encrypted = volume.Encrypted,
vol.size = volume.Size,
vol.state = volume.State,
vol.outpostarn = volume.OutpostArn,
vol.snapshotid = volume.SnapshotId,
vol.iops = volume.Iops,
vol.fastrestored = volume.FastRestored,
vol.multiattachenabled = volume.MultiAttachEnabled,
vol.type = volume.VolumeType,
vol.kmskeyid = volume.KmsKeyId,
vol.region=$Region
WITH vol
MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
MERGE (aa)-[r:RESOURCE]->(vol)
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $update_tag
"""

neo4j_session.run(
ingest_volumes,
volumes_list=data,
AWS_ACCOUNT_ID=current_aws_account_id,
load(
neo4j_session,
EBSVolumeSchema(),
ebs_data,
Region=region,
update_tag=update_tag,
AWS_ID=current_aws_account_id,
lastupdated=update_tag,
)


def load_volume_relationships(
neo4j_session: neo4j.Session,
volumes: List[Dict[str, Any]],
aws_update_tag: int,
) -> None:
add_relationship_query = """
MATCH (volume:EBSVolume{arn: $VolumeArn})
WITH volume
MATCH (instance:EC2Instance{instanceid: $InstanceId})
MERGE (volume)-[r:ATTACHED_TO_EC2_INSTANCE]->(instance)
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $aws_update_tag
"""
for volume in volumes:
for attachment in volume.get('Attachments', []):
if attachment['State'] != 'attached':
continue
neo4j_session.run(
add_relationship_query,
VolumeArn=volume['VolumeArn'],
InstanceId=attachment['InstanceId'],
aws_update_tag=aws_update_tag,
)


@timeit
def cleanup_volumes(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
def cleanup_volumes(neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]) -> None:
GraphJob.from_node_schema(EBSVolumeSchema(), common_job_parameters).run(neo4j_session)


@timeit
def sync_ebs_volumes(
neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str],
current_aws_account_id: str, update_tag: int, common_job_parameters: Dict,
neo4j_session: neo4j.Session,
boto3_session: boto3.session.Session,
regions: List[str],
current_aws_account_id: str,
update_tag: int,
common_job_parameters: Dict[str, Any],
) -> None:
for region in regions:
logger.debug("Syncing volumes for region '%s' in account '%s'.", region, current_aws_account_id)
data = get_volumes(boto3_session, region)
transformed_data = transform_volumes(data, region, current_aws_account_id)
load_volumes(neo4j_session, transformed_data, region, current_aws_account_id, update_tag)
load_volume_relationships(neo4j_session, transformed_data, update_tag)
cleanup_volumes(neo4j_session, common_job_parameters)
18 changes: 18 additions & 0 deletions cartography/intel/aws/util/arns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from typing import Optional


def build_arn(
resource: str,
account: str,
typename: str,
name: str,
region: Optional[str] = None,
partition: Optional[str] = None,
) -> str:
if not partition:
# TODO: support partitions from others. Please file an issue on this if needed, would love to hear from you
partition = 'aws'
if not region:
# Some resources are present in all regions, e.g. IAM policies
region = ""
return f"arn:{partition}:{resource}:{region}:{account}:{typename}/{name}"
46 changes: 45 additions & 1 deletion cartography/models/aws/ec2/volumes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,23 @@

@dataclass(frozen=True)
class EBSVolumeNodeProperties(CartographyNodeProperties):
arn: PropertyRef = PropertyRef('Arn', extra_index=True)
id: PropertyRef = PropertyRef('VolumeId')
volumeid: PropertyRef = PropertyRef('VolumeId', extra_index=True)
region: PropertyRef = PropertyRef('Region', set_in_kwargs=True)
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
deleteontermination: PropertyRef = PropertyRef('DeleteOnTermination')
availabilityzone: PropertyRef = PropertyRef('AvailabilityZone')
createtime: PropertyRef = PropertyRef('CreateTime')
encrypted: PropertyRef = PropertyRef('Encrypted')
size: PropertyRef = PropertyRef('Size')
state: PropertyRef = PropertyRef('State')
outpostarn: PropertyRef = PropertyRef('OutpostArn')
snapshotid: PropertyRef = PropertyRef('SnapshotId')
iops: PropertyRef = PropertyRef('Iops')
fastrestored: PropertyRef = PropertyRef('FastRestored')
multiattachenabled: PropertyRef = PropertyRef('MultiAttachEnabled')
type: PropertyRef = PropertyRef('VolumeType')
kmskeyid: PropertyRef = PropertyRef('KmsKeyId')


@dataclass(frozen=True)
Expand Down Expand Up @@ -53,6 +66,9 @@ class EBSVolumeToEC2Instance(CartographyRelSchema):

@dataclass(frozen=True)
class EBSVolumeSchema(CartographyNodeSchema):
"""
EBS Volume properties as returned from the EBS Volume API response
"""
label: str = 'EBSVolume'
properties: EBSVolumeNodeProperties = EBSVolumeNodeProperties()
sub_resource_relationship: EBSVolumeToAWSAccount = EBSVolumeToAWSAccount()
Expand All @@ -61,3 +77,31 @@ class EBSVolumeSchema(CartographyNodeSchema):
EBSVolumeToEC2Instance(),
],
)


@dataclass(frozen=True)
class EBSVolumeInstanceProperties(CartographyNodeProperties):
"""
EBS Volume properties as known by an EC2 instance.
The EC2 instance API response includes a `deleteontermination` field and the volume id.
"""
arn: PropertyRef = PropertyRef('Arn', extra_index=True)
id: PropertyRef = PropertyRef('VolumeId')
volumeid: PropertyRef = PropertyRef('VolumeId', extra_index=True)
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
deleteontermination: PropertyRef = PropertyRef('DeleteOnTermination')


@dataclass(frozen=True)
class EBSVolumeInstanceSchema(CartographyNodeSchema):
"""
EBS Volume from EC2 Instance API response. This is separate from `EBSVolumeSchema` to prevent issue #1210.
"""
label: str = 'EBSVolume'
properties: EBSVolumeInstanceProperties = EBSVolumeInstanceProperties()
sub_resource_relationship: EBSVolumeToAWSAccount = EBSVolumeToAWSAccount()
other_relationships: OtherRelationships = OtherRelationships(
[
EBSVolumeToEC2Instance(),
],
)
4 changes: 2 additions & 2 deletions tests/data/aws/ec2/volumes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
'Size': 123,
'SnapshotId': 'sn-01',
'State': 'available',
'VolumeId': 'v-01',
'VolumeId': 'vol-0df',
'Iops': 123,
'VolumeType': 'standard',
'FastRestored': True,
Expand All @@ -33,7 +33,7 @@
'OutpostArn': 'arn1',
'Size': 123,
'State': 'available',
'VolumeId': 'v-02',
'VolumeId': 'vol-03',
'Iops': 123,
'SnapshotId': 'sn-02',
'VolumeType': 'standard',
Expand Down
Loading

0 comments on commit d75b1de

Please sign in to comment.