Skip to content

Commit

Permalink
fix: Fixed page synthesis for characters out of latin-1 (#496)
Browse files Browse the repository at this point in the history
  • Loading branch information
fg-mindee authored Sep 28, 2021
1 parent 76500ee commit 6517000
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion doctr/utils/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from copy import deepcopy
import numpy as np
import cv2
from unidecode import unidecode
from typing import Tuple, List, Dict, Any, Union, Optional

from .common_types import BoundingBox, RotatedBbox
Expand Down Expand Up @@ -279,7 +280,11 @@ def synthesize_page(
img = Image.new('RGB', (xmax - xmin, ymax - ymin), color=(255, 255, 255))
d = ImageDraw.Draw(img)
# Draw in black the value of the word
d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
try:
d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
except UnicodeEncodeError:
# When character cannot be encoded, use its unidecode version
d.text((0, 0), unidecode(word["value"]), font=font, fill=(0, 0, 0))

# Colorize if draw_proba
if draw_proba:
Expand Down

0 comments on commit 6517000

Please sign in to comment.