Skip to content

Commit

Permalink
🚧 dinglehopper: Add word differences in line-dirs report
Browse files Browse the repository at this point in the history
  • Loading branch information
mikegerber committed Dec 14, 2021
1 parent dbb6606 commit cb2be96
Showing 1 changed file with 7 additions and 8 deletions.
15 changes: 7 additions & 8 deletions qurator/dinglehopper/cli_line_dirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def process(gt_dir, ocr_dir, report_prefix, *, metrics=True):
cer = None
n_characters = None
char_diff_report = ""
word_diff_report = ""

for k, gt in enumerate(os.listdir(gt_dir)):
# Find a match by replacing the suffix
Expand All @@ -65,16 +66,14 @@ def process(gt_dir, ocr_dir, report_prefix, *, metrics=True):
wer = 9999; n_words = 0

char_diff_report += gen_diff_report(
gt_text, ocr_text, css_prefix="l{0}-c".format(k), joiner="", none="·"
gt_text, ocr_text, css_prefix="l{0}-c".format(k), joiner="", none="·"
)

# TODO
# gt_words = words_normalized(gt_text)
# ocr_words = words_normalized(ocr_text)
# word_diff_report = gen_diff_report(
# gt_words, ocr_words, css_prefix="w", joiner=" ", none="⋯"
# )
word_diff_report = "TODO"
gt_words = words_normalized(gt_text)
ocr_words = words_normalized(ocr_text)
word_diff_report += gen_diff_report(
gt_words, ocr_words, css_prefix="l{0}-w".format(k), joiner=" ", none="⋯"
)


# XXX this is a copy from cli.py
Expand Down

0 comments on commit cb2be96

Please sign in to comment.