From 1ed9c43c5f1778362c6cd2088140d876481acc20 Mon Sep 17 00:00:00 2001 From: liamdugan Date: Wed, 11 Sep 2024 15:36:08 -0400 Subject: [PATCH 1/4] Fixed bug loading dataframe with pre-existing scores column --- raid/evaluate.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/raid/evaluate.py b/raid/evaluate.py index 6325444..576aea4 100644 --- a/raid/evaluate.py +++ b/raid/evaluate.py @@ -7,6 +7,10 @@ def load_detection_result(df, results): # Load the dataframe and read in the scores scores_df = pd.DataFrame.from_records(results) + # If df has a pre-existing score column, remove it before merging + if 'score' in df.columns: + df = df.drop(columns=['score']) + # Merge dataframes based on the id and validate that ids are unique return df.join(scores_df.set_index("id"), on="id", validate="one_to_one") From d2a47ed19f8b51bb7d68301b895f8b238ce9e604 Mon Sep 17 00:00:00 2001 From: liamdugan Date: Wed, 11 Sep 2024 15:37:51 -0400 Subject: [PATCH 2/4] Code style --- raid/evaluate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/raid/evaluate.py b/raid/evaluate.py index 576aea4..4c0d955 100644 --- a/raid/evaluate.py +++ b/raid/evaluate.py @@ -8,8 +8,8 @@ def load_detection_result(df, results): scores_df = pd.DataFrame.from_records(results) # If df has a pre-existing score column, remove it before merging - if 'score' in df.columns: - df = df.drop(columns=['score']) + if "score" in df.columns: + df = df.drop(columns=["score"]) # Merge dataframes based on the id and validate that ids are unique return df.join(scores_df.set_index("id"), on="id", validate="one_to_one") From d352aaf7a22e77755ef69ca7f5eed591b002303d Mon Sep 17 00:00:00 2001 From: liamdugan Date: Wed, 11 Sep 2024 15:38:14 -0400 Subject: [PATCH 3/4] Bump version --- raid/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raid/_version.py b/raid/_version.py index 034f46c..6526deb 100644 --- a/raid/_version.py +++ b/raid/_version.py @@ -1 +1 @@ -__version__ = "0.0.6" +__version__ = "0.0.7" From d5345aee07a48d1ba36070c2e246f6dd25ee7d1c Mon Sep 17 00:00:00 2001 From: liamdugan Date: Wed, 11 Sep 2024 16:11:13 -0400 Subject: [PATCH 4/4] run_detection no longer edits dataframe in place --- raid/detect.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/raid/detect.py b/raid/detect.py index cd4c5a2..f76bb78 100644 --- a/raid/detect.py +++ b/raid/detect.py @@ -1,9 +1,12 @@ def run_detection(f, df): + # Make a copy of the IDs of the original dataframe to avoid editing in place + scores_df = df[["id"]].copy() + # Run the detector function on the dataset and put output in score column - df["score"] = f(df["generation"]) + scores_df["score"] = f(df["generation"]) # Convert scores and ids to dict in 'records' format for seralization # e.g. [{'id':'...', 'score':0}, {'id':'...', 'score':1}, ...] - results = df[["id", "score"]].to_dict(orient="records") + results = scores_df[["id", "score"]].to_dict(orient="records") return results