hed-standard · VisLab · Jun 13, 2024 · Jun 12, 2024 · Jun 12, 2024 · Jun 12, 2024
diff --git a/hed/tools/analysis/key_map.py b/hed/tools/analysis/key_map.py
@@ -1,6 +1,5 @@
 """ A map of column value keys into new column values. """
 
-
 import pandas as pd
 from hed.errors.exceptions import HedFileError
 from hed.tools.util import data_util
@@ -18,6 +17,7 @@ class KeyMap:
     The remapping does not support other types of columns.
 
     """
+
     def __init__(self, key_cols, target_cols=None, name=''):
         """ Information for remapping columns of tabular files.
 
@@ -128,27 +128,36 @@ def remap(self, data):
         return df_new, missing_indices
 
     def _remap(self, df):
-        """ Utility method that iterates through dataframes to do the remapping.
+        """ Utility method that does the remapping
 
         Parameters:
             df (DataFrame):    DataFrame in which to perform the mapping.
 
         Returns:
             list:  The row numbers that had no correspondence in the mapping.
-
         """
+        key_series = df.apply(lambda row: data_util.get_row_hash(row, self.key_cols), axis=1)
+        # Key series now contains row_number: hash for each row in the dataframe
+
+        # Add a column containing the mapped index for each row
+        map_series = pd.Series(self.map_dict)  # map_series is hash:row_index for each entry in the map_dict index
+        key_values = key_series.map(map_series)  # key_values is df_row_number:map_dict_index
+        # e.g. a key_value entry of 0:79 means row 0 maps to row 79 in the map_dict
+
+        # This adds the map_dict_index column, to merged_df as a new column "key_value"
+        merged_df = df.assign(key_value=key_values.values)
+
+        # Copy all the map_dict data into merged_df as new columns, merging on the map_dict_index number of both
+        remapped_df = pd.merge(merged_df, self.col_map, left_on='key_value', right_index=True,
+                               suffixes=('', '_new'), how='left').fillna("n/a")
+
+        # Override the original columns with our newly calculated ones
+        for col in self.target_cols:
+            df[col] = remapped_df[col + '_new']
+
+        # Finally calculate missing indices
+        missing_indices = key_series.index[key_values.isna()].tolist()
 
-        missing_indices = []
-        for index, row in df.iterrows():
-            key = data_util.get_row_hash(row, self.key_cols)
-            key_value = self.map_dict.get(key, None)
-            if key_value is not None:
-                result = self.col_map.iloc[key_value]
-                row[self.target_cols] = result[self.target_cols].values
-                new_index = df.index.get_loc(index)  # In case index and location don't agree.
-                df.iloc[new_index] = row
-            else:
-                missing_indices.append(index)
         return missing_indices
 
     def resort(self):