Unstructured-IO · christinestraub · Jun 3, 2024 · May 31, 2024 · May 31, 2024 · May 31, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,4 @@
-## 0.14.4-dev6
+## 0.14.4
 
 ### Enhancements
 
@@ -12,6 +12,7 @@
 
 ### Fixes
 
+* **Address the issue of unrecognized tables in `UnstructuredTableTransformerModel`** When a table is not recognized, the `element.metadata.text_as_html` attribute is set to an empty string.
 * **Remove root handlers in ingest logger**. Removes root handlers in ingest loggers to ensure secrets aren't accidentally exposed in Colab notebooks.
 * **Fix V2 S3 Destination Connector authentication** Fixes bugs with S3 Destination Connector where the connection config was neither registered nor properly deserialized.
 * **Clarified dependence on particular version of `python-docx`** Pinned `python-docx` version to ensure a particular method `unstructured` uses is included.

diff --git a/unstructured/__version__.py b/unstructured/__version__.py
@@ -1 +1 @@
-__version__ = "0.14.4-dev6"  # pragma: no cover
+__version__ = "0.14.4"  # pragma: no cover
diff --git a/unstructured/partition/pdf_image/ocr.py b/unstructured/partition/pdf_image/ocr.py
@@ -280,7 +280,8 @@ def supplement_element_with_table_extraction(
             cropped_image, ocr_tokens=table_tokens, result_format="cells"
         )
 
-        text_as_html = cells_to_html(tatr_cells)
+        # NOTE(christine): `tatr_cells == ""` means that the table was not recognized
+        text_as_html = "" if tatr_cells == "" else cells_to_html(tatr_cells)
         element.text_as_html = text_as_html
 
         if env_config.EXTRACT_TABLE_AS_CELLS:
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		__version__ = "0.14.4-dev6" # pragma: no cover
		__version__ = "0.14.4" # pragma: no cover