Skip to content

Commit

Permalink
Fix F1 auto-threshold to choose best largest confidence (#2371)
Browse files Browse the repository at this point in the history
* Fix F1 auto-threshold to choose best largest confidence

* Update license notice

* Update change log

---------
Signed-off-by: Songki Choi <songki.choi@intel.com>
  • Loading branch information
goodsong81 authored Jul 18, 2023
1 parent 48989b2 commit e780cca
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 20 deletions.
18 changes: 18 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,24 @@

All notable changes to this project will be documented in this file.

## \[v1.5.0\]

### New features

-

### Enhancements

-

### Bug fixes

- Fix F1 auto-threshold to choose best largest confidence (<https://github.com/openvinotoolkit/training_extensions/pull/2371>)

### Known issues

- OpenVINO(==2023.0) IR inference is not working well on 2-stage models (e.g. Mask-RCNN) exported from torch==1.13.1

## \[v1.4.0\]

### New features
Expand Down
8 changes: 3 additions & 5 deletions src/otx/api/usecases/evaluation/f_measure.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
"""This module contains the f-measure performance provider class."""

# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2021-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#


import logging
from typing import Dict, List, Optional, Tuple

Expand Down Expand Up @@ -363,7 +361,7 @@ def get_results_per_confidence(
result.f_measure_curve[class_name].append(result_point[class_name].f_measure)
result.precision_curve[class_name].append(result_point[class_name].precision)
result.recall_curve[class_name].append(result_point[class_name].recall)
if all_classes_f_measure > result.best_f_measure:
if all_classes_f_measure > 0.0 and all_classes_f_measure >= result.best_f_measure:
result.best_f_measure = all_classes_f_measure
result.best_threshold = confidence_threshold
return result
Expand Down Expand Up @@ -417,7 +415,7 @@ def get_results_per_nms(
result.precision_curve[class_name].append(result_point[class_name].precision)
result.recall_curve[class_name].append(result_point[class_name].recall)

if all_classes_f_measure >= result.best_f_measure:
if all_classes_f_measure > 0.0 and all_classes_f_measure >= result.best_f_measure:
result.best_f_measure = all_classes_f_measure
result.best_threshold = nms_threshold
return result
Expand Down
22 changes: 7 additions & 15 deletions tests/unit/api/usecases/evaluation/test_f_measure.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
# Copyright (C) 2020-2021 Intel Corporation
# Copyright (C) 2020-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.

import datetime
from typing import cast
Expand Down Expand Up @@ -962,7 +952,7 @@ def test_f_measure_calculator_get_results_per_confidence(self):
# Check "_AggregatedResults" object returned by "get_results_per_confidence" when All Classes f-measure is more
# than best f-measure in results_per_confidence
expected_results_per_confidence = _AggregatedResults(["class_1", "class_2"])
for confidence_threshold in np.arange(*[0.6, 0.9]):
for confidence_threshold in np.arange(*[0.6, 0.9, 0.1]):
result_point = f_measure_calculator.evaluate_classes(
classes=["class_1", "class_2"],
iou_threshold=0.7,
Expand All @@ -978,7 +968,7 @@ def test_f_measure_calculator_get_results_per_confidence(self):

actual_results_per_confidence = f_measure_calculator.get_results_per_confidence(
classes=["class_1", "class_2"],
confidence_range=[0.6, 0.9],
confidence_range=[0.6, 0.9, 0.1], # arrange(0.6, 0.9, 0.1)
iou_threshold=0.7,
)
assert actual_results_per_confidence.all_classes_f_measure_curve == (
Expand All @@ -987,7 +977,9 @@ def test_f_measure_calculator_get_results_per_confidence(self):
assert actual_results_per_confidence.f_measure_curve == expected_results_per_confidence.f_measure_curve
assert actual_results_per_confidence.recall_curve == expected_results_per_confidence.recall_curve
assert actual_results_per_confidence.best_f_measure == 0.5454545454545453
assert actual_results_per_confidence.best_threshold == 0.6
# 0.6 -> 0.54, 0.7 -> 0.54, 0.8 -> 0.54, 0.9 -> 0.44
# Best ""LARGEST" trehshold should be 0.8 (considering numerical error)
assert abs(actual_results_per_confidence.best_threshold - 0.8) < 0.001
# Check "_AggregatedResults" object returned by "get_results_per_confidence" when All Classes f-measure is less
# than best f-measure in results_per_confidence
actual_results_per_confidence = f_measure_calculator.get_results_per_confidence(
Expand Down

0 comments on commit e780cca

Please sign in to comment.