Skip to content

Commit

Permalink
Rewrite resource owner tracking to avoid quadratic blowup
Browse files Browse the repository at this point in the history
Instead of merging dictionaries of resources at every intermediate rule, we
keep a nested set of (path, owner) pairs as well as a nested set of unowned
resources, and reconstruct the implicit map at the binary level only when we
actually need it to filter the resources, and drop it immediately afterwards.

For a benchmark run, this reduces post-analysis memory from ~5.9 GB to ~5 GB.

RELNOTES: Refactored AppleResourceInfo provider to reduce memory consumption.
PiperOrigin-RevId: 259545637
  • Loading branch information
Googler authored and swiple-rules-gardener committed Jul 23, 2019
1 parent 9462b5e commit 6f5fa24
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 59 deletions.
30 changes: 25 additions & 5 deletions apple/internal/partials/resources.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,19 @@ def _merge_root_infoplists(ctx, infoplists, out_infoplist, **kwargs):

return [(processor.location.content, None, depset(direct = files))]

def _deduplicate(resources_provider, avoid_provider, field):
def _expand_owners(owners):
"""Converts a depset of (path, owner) to a dict of paths to dict of owners.
Args:
owners: A depset of (path, owner) pairs.
"""
dict = {}
for resource, owner in owners.to_list():
if owner:
dict.setdefault(resource, default = {})[owner] = None
return dict

def _deduplicate(resources_provider, avoid_provider, owners, avoid_owners, field):
"""Deduplicates and returns resources between 2 providers for a given field.
Deduplication happens by comparing the target path of a file and the files
Expand All @@ -112,14 +124,15 @@ def _deduplicate(resources_provider, avoid_provider, field):
Args:
resources_provider: The provider with the resources to be bundled.
avoid_provider: The provider with the resources to avoid bundling.
owners: The owners map for resources_provider computed by _expand_owners.
avoid_owners: The owners map for avoid_provider computed by _expand_owners.
field: The field to deduplicate resources on.
Returns:
A list of tuples with the resources present in avoid_providers removed from
resources_providers.
"""

# Build a dictionary with the file paths under each key for the avoided resources.
avoid_dict = {}
if avoid_provider and hasattr(avoid_provider, field):
for parent_dir, swift_module, files in getattr(avoid_provider, field):
Expand Down Expand Up @@ -151,8 +164,8 @@ def _deduplicate(resources_provider, avoid_provider, field):
# add the resource to be bundled in the bundle represented by resource_provider.
deduped_owners = [
o
for o in resources_provider.owners[short_path].to_list()
if o not in avoid_provider.owners[short_path].to_list()
for o in owners[short_path]
if o not in avoid_owners[short_path]
]
if deduped_owners:
deduped_files.append(to_bundle_file)
Expand Down Expand Up @@ -299,9 +312,16 @@ def _resources_partial_impl(
locales_included = sets.make(["Base"])
locales_dropped = sets.make()

# Precompute owners and avoid_owners to avoid duplicate work in _deduplicate.
# Build a dictionary with the file paths under each key for the avoided resources.
avoid_owners = {}
if avoid_provider:
avoid_owners = _expand_owners(avoid_provider.owners)
owners = _expand_owners(final_provider.owners)

for field in fields:
processing_func, requires_swift_module = provider_field_to_action[field]
deduplicated = _deduplicate(final_provider, avoid_provider, field)
deduplicated = _deduplicate(final_provider, avoid_provider, owners, avoid_owners, field)
for parent_dir, swift_module, files in deduplicated:
if locales_requested:
locale = _locale_for_path(parent_dir)
Expand Down
5 changes: 4 additions & 1 deletion apple/internal/resource_rules/apple_resource_bundle.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ def _apple_resource_bundle_impl(ctx):
else:
# If there were no resources to bundle, propagate an empty provider to signal that this
# target has already been processed anyways.
complete_resource_provider = AppleResourceInfo(owners = {})
complete_resource_provider = AppleResourceInfo(
owners = depset(),
unowned_resources = depset(),
)

return [
# TODO(b/122578556): Remove this ObjC provider instance.
Expand Down
101 changes: 51 additions & 50 deletions apple/internal/resources.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -134,23 +134,23 @@ def _bucketize(
A AppleResourceInfo provider with resources bucketized according to type.
"""
buckets = {}
owners = {}
owner_depset = None
owners = []
unowned_resources = []

# Transform the list of buckets to avoid into a set for faster lookup.
allowed_bucket_set = {}
if allowed_buckets:
allowed_bucket_set = {k: None for k in allowed_buckets}

if owner:
# By using one depset reference, we can save memory for the cases where multiple resources
# only have one owner.
owner_depset = depset(direct = [owner])
for resource in resources:
# Local cache of the resource short path since it gets used quite a bit below.
resource_short_path = resource.short_path

owners[resource_short_path] = owner_depset
if owner:
owners.append((resource_short_path, owner))
else:
unowned_resources.append(resource_short_path)

if types.is_string(parent_dir_param) or parent_dir_param == None:
parent = parent_dir_param
else:
Expand Down Expand Up @@ -204,7 +204,8 @@ def _bucketize(
).append((parent, resource_swift_module, resource_depset))

return AppleResourceInfo(
owners = owners,
owners = depset(owners),
unowned_resources = depset(unowned_resources),
**dict([(k, _minimize(b)) for k, b in buckets.items()])
)

Expand All @@ -228,15 +229,16 @@ def _bucketize_typed(resources, bucket_type, owner = None, parent_dir_param = No
A AppleResourceInfo provider with resources in the given bucket.
"""
typed_bucket = []
owners = {}
owner_depset = None
if owner:
# By using one depset reference, we can save memory for the cases where multiple resources
# only have one owner.
owner_depset = depset(direct = [owner])
owners = []
unowned_resources = []

for resource in resources:
resource_short_path = resource.short_path
owners[resource_short_path] = owner_depset
if owner:
owners.append((resource_short_path, owner))
else:
unowned_resources.append(resource_short_path)

if types.is_string(parent_dir_param) or parent_dir_param == None:
parent = parent_dir_param
else:
Expand All @@ -247,7 +249,12 @@ def _bucketize_typed(resources, bucket_type, owner = None, parent_dir_param = No
parent = paths.join(parent or "", paths.basename(lproj_path))

typed_bucket.append((parent, None, depset(direct = [resource])))
return AppleResourceInfo(owners = owners, **{bucket_type: _minimize(typed_bucket)})

return AppleResourceInfo(
owners = depset(owners),
unowned_resources = depset(unowned_resources),
**{bucket_type: _minimize(typed_bucket)}
)

def _bundle_relative_parent_dir(resource, extension):
"""Returns the bundle relative path to the resource rooted at the bundle.
Expand Down Expand Up @@ -331,13 +338,6 @@ def _merge_providers(providers, default_owner = None, validate_all_resources_own

buckets = {}

# owners is a map of resource paths to a list of depsets of transitive owners.
owners_lists = {}
default_owner_depset = None
if default_owner:
# By using one depset reference, we can save memory for the cases where multiple resources
# only have one owner.
default_owner_depset = depset(direct = [default_owner])
for provider in providers:
# Get the initialized fields in the provider, with the exception of to_json and to_proto,
# which are not desireable for our use case.
Expand All @@ -347,34 +347,30 @@ def _merge_providers(providers, default_owner = None, validate_all_resources_own
field,
default = [],
).extend(getattr(provider, field))
for resource_path, resource_owners in provider.owners.items():
owners_list = owners_lists.setdefault(resource_path, [])

# If there is no owner marked for this resource, use the default_owner as an owner, if
# it exists.
if resource_owners:
owners_list.append(resource_owners)
elif default_owner:
owners_list.append(default_owner_depset)
elif validate_all_resources_owned:
fail(
"The given providers have a resource that doesn't have an owner, and " +
"validate_all_resources_owned was set. This is most likely a bug in " +
"rules_apple, please file a bug with reproduction steps.",
)

# owners is a map of resource paths to a depset of owner identifiers.
owners = {}
for resource_path, owner_list in owners_lists.items():
if len(owner_list) == 1:
# If there is only one transitive depset, avoid creating a new depset, just
# propagate it.
owners[resource_path] = owner_list[0]
else:
owners[resource_path] = depset(transitive = owner_list)

# unowned_resources is a depset of resource paths.
unowned_resources = depset(transitive = [provider.unowned_resources for provider in providers])

# owners is a depset of (resource_path, owner) pairs.
transitive_owners = [provider.owners for provider in providers]

# If owner is set, this rule now owns all previously unowned resources.
if default_owner:
transitive_owners.append(
depset([(resource, default_owner) for resource in unowned_resources.to_list()]),
)
unowned_resources = depset()
elif validate_all_resources_owned:
if unowned_resources.to_list():
fail(
"The given providers have a resource that doesn't have an owner, and " +
"validate_all_resources_owned was set. This is most likely a bug in " +
"rules_apple, please file a bug with reproduction steps.",
)

return AppleResourceInfo(
owners = owners,
owners = depset(transitive = transitive_owners),
unowned_resources = unowned_resources,
**dict([(k, _minimize(v)) for (k, v) in buckets.items()])
)

Expand Down Expand Up @@ -446,14 +442,19 @@ def _nest_in_bundle(provider_to_nest, nesting_bundle_dir):

return AppleResourceInfo(
owners = provider_to_nest.owners,
unowned_resources = provider_to_nest.unowned_resources,
**nested_provider_fields
)

def _populated_resource_fields(provider):
"""Returns a list of field names of the provider's resource buckets that are non empty."""

# TODO(b/36412967): Remove the to_json and to_proto elements of this list.
return [f for f in dir(provider) if f not in ["owners", "to_json", "to_proto"]]
return [
f
for f in dir(provider)
if f not in ["owners", "unowned_resources", "to_json", "to_proto"]
]

def _structured_resources_parent_dir(resource, parent_dir = None):
"""Returns the package relative path for the parent directory of a resource.
Expand Down
4 changes: 2 additions & 2 deletions apple/providers.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ only be bucketed with the `bucketize_typed` method.""",
"texture_atlases": "Texture atlas files.",
"unprocessed": "Generic resources not mapped to the other types.",
"xibs": "XIB Interface files.",
"owners": """Map of resource short paths to a depset of strings that represent targets that
declare ownership of that resource.""",
"owners": """Depset of (resource, owner) pairs.""",
"unowned_resources": """Depset of unowned resources.""",
},
)

Expand Down
6 changes: 5 additions & 1 deletion test/ios_application_resources_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,11 @@ def _impl(ctx):
outputs = depset([output])
return [
DefaultInfo(files = outputs),
AppleResourceInfo(unprocessed = [(None, None, outputs)], owners = {}),
AppleResourceInfo(
unprocessed = [(None, None, outputs)],
owners = depset(),
unowned_resources = depset(),
),
]
custom_rule = rule(_impl)
Expand Down

0 comments on commit 6f5fa24

Please sign in to comment.