Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/textequiv-index'
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Sep 13, 2022
2 parents be33adc + 8d9a566 commit be1cfd1
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
4 changes: 3 additions & 1 deletion ocrd_page_to_alto/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
@click.option('--dummy-textline/--no-dummy-textline', default=True, help='Whether to create a TextLine for regions that have TextEquiv/Unicode but no TextLine')
@click.option('--dummy-word/--no-dummy-word', default=True, help='Whether to create a Word for TextLine that have TextEquiv/Unicode but no Word')
@click.option('--textequiv-index', default=0, help='If multiple textequiv, use the n-th TextEquiv by @index')
@click.option('--textequiv-fallback-strategy', default='last', type=click.Choice(['raise', 'first', 'last']), help="What to do if nth textequiv isn't available. 'raise' will lead to a runtime error, 'first' will use the first TextEquiv, 'last' will use the last TextEquiv on the element")
@click.option('--textequiv-fallback-strategy', default='first', type=click.Choice(['raise', 'first', 'last']),
help="What to do if selected TextEquiv @index is not available: 'raise' will lead to a runtime error, "
"'first' will use the first TextEquiv, 'last' will use the last TextEquiv on the element")
@click.option('--region-order', default='document', help="Order in which to iterate over the regions", type=click.Choice(['document', 'reading-order', 'reading-order-only']))
@click.option('--textline-order', default='document', help="Order in which to iterate over the textlines", type=click.Choice(['document', 'index', 'textline-order']))
@click.option('-O', '--output-file', default='-', help='Output filename (or "-" for standard output, the default)',
Expand Down
18 changes: 9 additions & 9 deletions ocrd_page_to_alto/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ def get_nth_textequiv(reg_page, textequiv_index, textequiv_fallback_strategy):
if textequiv_fallback_strategy == 'raise':
raise ValueError("PAGE element '%s' has no TextEquivs and fallback strategy is to raise" % reg_page.id)
return ''
if len(textequivs) < textequiv_index + 1:
if textequiv_fallback_strategy == 'raise':
raise ValueError("PAGE element '%s' has only %d TextEquiv elements so cannot choose the %s%s and fallback strategy is to raise" % (
reg_page.id, len(textequivs), textequiv_index + 1, 'st' if textequiv_index == 0 else 'nd'))
elif textequiv_fallback_strategy == 'first':
return textequivs[0].Unicode
else:
return textequivs[-1].Unicode
for textequiv in textequivs:
if textequiv.get_index() == textequiv_index:
return textequiv.Unicode
if textequiv_fallback_strategy == 'raise':
raise ValueError("PAGE element '%s' has no TextEquiv index %d" % (
reg_page.id, textequiv_index))
elif textequiv_fallback_strategy == 'first':
return textequivs[0].Unicode
else:
return textequivs[textequiv_index].Unicode
return textequivs[-1].Unicode

def contains(el, bbox):
minx1, miny1, maxx1, maxy1 = bbox
Expand Down

0 comments on commit be1cfd1

Please sign in to comment.