diff --git a/raid/_version.py b/raid/_version.py index 034f46c..6526deb 100644 --- a/raid/_version.py +++ b/raid/_version.py @@ -1 +1 @@ -__version__ = "0.0.6" +__version__ = "0.0.7" diff --git a/raid/detect.py b/raid/detect.py index cd4c5a2..f76bb78 100644 --- a/raid/detect.py +++ b/raid/detect.py @@ -1,9 +1,12 @@ def run_detection(f, df): + # Make a copy of the IDs of the original dataframe to avoid editing in place + scores_df = df[["id"]].copy() + # Run the detector function on the dataset and put output in score column - df["score"] = f(df["generation"]) + scores_df["score"] = f(df["generation"]) # Convert scores and ids to dict in 'records' format for seralization # e.g. [{'id':'...', 'score':0}, {'id':'...', 'score':1}, ...] - results = df[["id", "score"]].to_dict(orient="records") + results = scores_df[["id", "score"]].to_dict(orient="records") return results diff --git a/raid/evaluate.py b/raid/evaluate.py index 6325444..4c0d955 100644 --- a/raid/evaluate.py +++ b/raid/evaluate.py @@ -7,6 +7,10 @@ def load_detection_result(df, results): # Load the dataframe and read in the scores scores_df = pd.DataFrame.from_records(results) + # If df has a pre-existing score column, remove it before merging + if "score" in df.columns: + df = df.drop(columns=["score"]) + # Merge dataframes based on the id and validate that ids are unique return df.join(scores_df.set_index("id"), on="id", validate="one_to_one")