From 1ed9c43c5f1778362c6cd2088140d876481acc20 Mon Sep 17 00:00:00 2001
From: liamdugan <ldugan@seas.upenn.edu>
Date: Wed, 11 Sep 2024 15:36:08 -0400
Subject: [PATCH 1/4] Fixed bug loading dataframe with pre-existing scores
 column

---
 raid/evaluate.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/raid/evaluate.py b/raid/evaluate.py
index 6325444..576aea4 100644
--- a/raid/evaluate.py
+++ b/raid/evaluate.py
@@ -7,6 +7,10 @@ def load_detection_result(df, results):
     # Load the dataframe and read in the scores
     scores_df = pd.DataFrame.from_records(results)
 
+    # If df has a pre-existing score column, remove it before merging
+    if 'score' in df.columns:
+        df = df.drop(columns=['score'])
+
     # Merge dataframes based on the id and validate that ids are unique
     return df.join(scores_df.set_index("id"), on="id", validate="one_to_one")
 

From d2a47ed19f8b51bb7d68301b895f8b238ce9e604 Mon Sep 17 00:00:00 2001
From: liamdugan <ldugan@seas.upenn.edu>
Date: Wed, 11 Sep 2024 15:37:51 -0400
Subject: [PATCH 2/4] Code style

---
 raid/evaluate.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/raid/evaluate.py b/raid/evaluate.py
index 576aea4..4c0d955 100644
--- a/raid/evaluate.py
+++ b/raid/evaluate.py
@@ -8,8 +8,8 @@ def load_detection_result(df, results):
     scores_df = pd.DataFrame.from_records(results)
 
     # If df has a pre-existing score column, remove it before merging
-    if 'score' in df.columns:
-        df = df.drop(columns=['score'])
+    if "score" in df.columns:
+        df = df.drop(columns=["score"])
 
     # Merge dataframes based on the id and validate that ids are unique
     return df.join(scores_df.set_index("id"), on="id", validate="one_to_one")

From d352aaf7a22e77755ef69ca7f5eed591b002303d Mon Sep 17 00:00:00 2001
From: liamdugan <ldugan@seas.upenn.edu>
Date: Wed, 11 Sep 2024 15:38:14 -0400
Subject: [PATCH 3/4] Bump version

---
 raid/_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/raid/_version.py b/raid/_version.py
index 034f46c..6526deb 100644
--- a/raid/_version.py
+++ b/raid/_version.py
@@ -1 +1 @@
-__version__ = "0.0.6"
+__version__ = "0.0.7"

From d5345aee07a48d1ba36070c2e246f6dd25ee7d1c Mon Sep 17 00:00:00 2001
From: liamdugan <ldugan@seas.upenn.edu>
Date: Wed, 11 Sep 2024 16:11:13 -0400
Subject: [PATCH 4/4] run_detection no longer edits dataframe in place

---
 raid/detect.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/raid/detect.py b/raid/detect.py
index cd4c5a2..f76bb78 100644
--- a/raid/detect.py
+++ b/raid/detect.py
@@ -1,9 +1,12 @@
 def run_detection(f, df):
+    # Make a copy of the IDs of the original dataframe to avoid editing in place
+    scores_df = df[["id"]].copy()
+
     # Run the detector function on the dataset and put output in score column
-    df["score"] = f(df["generation"])
+    scores_df["score"] = f(df["generation"])
 
     # Convert scores and ids to dict in 'records' format for seralization
     # e.g. [{'id':'...', 'score':0}, {'id':'...', 'score':1}, ...]
-    results = df[["id", "score"]].to_dict(orient="records")
+    results = scores_df[["id", "score"]].to_dict(orient="records")
 
     return results