Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix unexpected behaviour of generate_subcatalogs #241

Merged
merged 6 commits into from
Dec 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions pystac/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ def fn():

return self

def generate_subcatalogs(self, template, defaults=None, **kwargs):
def generate_subcatalogs(self, template, defaults=None, parent_ids=None, **kwargs):
"""Walks through the catalog and generates subcatalogs
for items based on the template string. See :class:`~pystac.layout.LayoutTemplate`
for details on the construction of template strings. This template string
Expand All @@ -533,29 +533,43 @@ def generate_subcatalogs(self, template, defaults=None, **kwargs):
defaults (dict): Default values for the template variables
that will be used if the property cannot be found on
the item.
parent_ids (List[str]): Optional list of the parent catalogs'
identifiers. If the bottom-most subcatalags already match the
template, no subcatalog is added.

Returns:
[catalog]: List of new catalogs created
"""
result = []
parent_ids = parent_ids or list()
parent_ids.append(self.id)
for child in self.get_children():
result.extend(child.generate_subcatalogs(template, defaults=defaults))
result.extend(
child.generate_subcatalogs(template,
defaults=defaults,
parent_ids=parent_ids.copy()))

layout_template = LayoutTemplate(template, defaults=defaults)
subcat_id_to_cat = {}

items = list(self.get_items())
for item in items:
item_parts = layout_template.get_template_values(item)
id_iter = reversed(parent_ids)
if all(['{}'.format(id) == next(id_iter, None)
for id in reversed(item_parts.values())]):
# Skip items for which the sub-catalog structure already
# matches the template. The list of parent IDs can include more
# elements on the root side, so compare the reversed sequences.
continue
curr_parent = self
for k, v in item_parts.items():
subcat_id = '{}'.format(v)
subcat = subcat_id_to_cat.get(subcat_id)
subcat = curr_parent.get_child(subcat_id)
if subcat is None:
subcat_desc = 'Catalog of items from {} with {} of {}'.format(
curr_parent.id, k, v)
subcat = pystac.Catalog(id=subcat_id, description=subcat_desc)
curr_parent.add_child(subcat)
subcat_id_to_cat[subcat_id] = subcat
result.append(subcat)
curr_parent = subcat
self.remove_item(item.id)
Expand Down
85 changes: 85 additions & 0 deletions tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,91 @@ def test_generate_subcatalogs_does_not_change_item_count(self):
expected = item_counts[child.id]
self.assertEqual(actual, expected, msg=" for child '{}'".format(child.id))

def test_generate_subcatalogs_can_be_applied_multiple_times(self):
catalog = TestCases.test_case_8()

_ = catalog.generate_subcatalogs('${year}/${month}')
catalog.normalize_hrefs('/tmp')
expected_hrefs = {item.id: item.get_self_href() for item in catalog.get_all_items()}

result = catalog.generate_subcatalogs('${year}/${month}')
self.assertEqual(len(result), 0)
catalog.normalize_hrefs('/tmp')
for item in catalog.get_all_items():
self.assertEqual(item.get_self_href(),
expected_hrefs[item.id],
msg=" for item '{}'".format(item.id))

def test_generate_subcatalogs_works_after_adding_more_items(self):
catalog = Catalog(id='test', description='Test')
properties = dict(property1='A', property2=1)
catalog.add_item(
Item(id='item1',
geometry=RANDOM_GEOM,
bbox=RANDOM_BBOX,
datetime=datetime.utcnow(),
properties=properties))
catalog.generate_subcatalogs('${property1}/${property2}')
catalog.add_item(
Item(id='item2',
geometry=RANDOM_GEOM,
bbox=RANDOM_BBOX,
datetime=datetime.utcnow(),
properties=properties))
catalog.generate_subcatalogs('${property1}/${property2}')

catalog.normalize_hrefs('/tmp')
item1_parent = catalog.get_item('item1', recursive=True).get_parent()
item2_parent = catalog.get_item('item2', recursive=True).get_parent()
self.assertEqual(item1_parent.get_self_href(), item2_parent.get_self_href())

def test_generate_subcatalogs_works_for_branched_subcatalogs(self):
catalog = Catalog(id='test', description='Test')
item_properties = [
dict(property1='A', property2=1, property3='i'), # add 3 subcats
dict(property1='A', property2=1, property3='j'), # add 1 more
dict(property1='A', property2=2, property3='i'), # add 2 more
dict(property1='B', property2=1, property3='i'), # add 3 more
]
for ni, properties in enumerate(item_properties):
catalog.add_item(
Item(id='item{}'.format(ni),
geometry=RANDOM_GEOM,
bbox=RANDOM_BBOX,
datetime=datetime.utcnow(),
properties=properties))
result = catalog.generate_subcatalogs('${property1}/${property2}/${property3}')
self.assertEqual(len(result), 9)

actual_subcats = set([cat.id for cat in result])
expected_subcats = {'A', 'B', '1', '2', 'i', 'j'}
self.assertSetEqual(actual_subcats, expected_subcats)

def test_generate_subcatalogs_works_for_subcatalogs_with_same_ids(self):
catalog = Catalog(id='test', description='Test')
item_properties = [
dict(property1=1, property2=1), # add 2 subcats
dict(property1=1, property2=2), # add 1 more
dict(property1=2, property2=1), # add 2 more
dict(property1=2, property2=2), # add 1 more
]
for ni, properties in enumerate(item_properties):
catalog.add_item(
Item(id='item{}'.format(ni),
geometry=RANDOM_GEOM,
bbox=RANDOM_BBOX,
datetime=datetime.utcnow(),
properties=properties))
result = catalog.generate_subcatalogs('${property1}/${property2}')
self.assertEqual(len(result), 6)

catalog.normalize_hrefs('/')
for item in catalog.get_all_items():
parent_href = item.get_parent().get_self_href()
path_to_parent, _ = os.path.split(parent_href)
subcats = [el for el in path_to_parent.split('/') if el]
self.assertEqual(len(subcats), 2, msg=" for item '{}'".format(item.id))

def test_map_items(self):
def item_mapper(item):
item.properties['ITEM_MAPPER'] = 'YEP'
Expand Down