Skip to content

Commit

Permalink
Add backward compatibility for metric calculation (#3798)
Browse files Browse the repository at this point in the history
Co-authored-by: cragwolfe <crag@unstructured.io>
  • Loading branch information
plutasnyy and cragwolfe authored Nov 26, 2024
1 parent e48d79e commit 0fe6ac6
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 3 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
## 0.16.8

### Enhancements
- **Metrics: Weighted table average is optional**

### Features

### Fixes

## 0.16.7

### Enhancements
Expand All @@ -7,6 +16,7 @@

### Fixes


## 0.16.6

### Enhancements
Expand Down
2 changes: 1 addition & 1 deletion unstructured/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.16.7" # pragma: no cover
__version__ = "0.16.8" # pragma: no cover
15 changes: 13 additions & 2 deletions unstructured/metrics/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ class TableStructureMetricsCalculator(BaseMetricsCalculator):
"""

cutoff: Optional[float] = None
weighted_average: bool = True
include_false_positives: bool = True

def __post_init__(self):
super().__post_init__()
Expand Down Expand Up @@ -287,11 +289,20 @@ def _generate_dataframes(self, rows):

df = pd.DataFrame(rows, columns=headers)
df["_table_weights"] = df["total_tables"]
# we give false positive tables a 1 table worth of weight in computing table level acc
df["_table_weights"][df.total_tables.eq(0) & df.total_predicted_tables.gt(0)] = 1

if self.include_false_positives:
# we give false positive tables a 1 table worth of weight in computing table level acc
df["_table_weights"][df.total_tables.eq(0) & df.total_predicted_tables.gt(0)] = 1

# filter down to only those with actual and/or predicted tables
has_tables_df = df[df["_table_weights"] > 0]

if not self.weighted_average:
# for all non zero elements assign them value 1
df["_table_weights"] = df["_table_weights"].apply(
lambda table_weight: 1 if table_weight != 0 else 0
)

if has_tables_df.empty:
agg_df = pd.DataFrame(
[[metric, None, None, None, 0] for metric in self.supported_metric_names]
Expand Down

0 comments on commit 0fe6ac6

Please sign in to comment.