Skip to content

Commit

Permalink
more fixes for font sizes
Browse files Browse the repository at this point in the history
  • Loading branch information
Soeb Hussain committed Nov 26, 2024
1 parent 41ff140 commit 5782305
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion lib/sycamore/sycamore/transforms/detr_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def _supplement_text(inferred: list[Element], text: list[Element], threshold: fl
i.tokens = [{"text": elem.text_representation, "bbox": elem.bbox} for elem in matches]

i.data["text_representation"] = " ".join(full_text)
i.properties["font_size"] = sum(font_sizes) / len(font_sizes) if font_sizes else 0.0
i.properties["font_size"] = sum(font_sizes) / len(font_sizes) if font_sizes else None
return inferred + unmatched

def partition_pdf(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def get_text(self, image: Image.Image) -> tuple[str, Optional[float]]:
out_list.append(text)
font_sizes.append(res[0][2][1] - res[0][0][1])
val = " ".join(out_list)
avg_font_size = sum(font_sizes) / len(font_sizes) if font_sizes else 0.0
avg_font_size = sum(font_sizes) / len(font_sizes) if font_sizes else None
return val, avg_font_size

def get_boxes_and_text(self, image: Image.Image) -> list[dict[str, Any]]:
Expand Down

0 comments on commit 5782305

Please sign in to comment.