Skip to content

Commit

Permalink
feat: allow providing settings_file, training_file paths for cases wh…
Browse files Browse the repository at this point in the history
…ere script is in a python package
  • Loading branch information
ClimenteA committed Jul 26, 2021
1 parent a4abc25 commit d92d0c2
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions pandas_dedupe/gazetteer_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def _cluster(deduper, clean_data, messy_data, threshold, canonicalize):


def gazetteer_dataframe(clean_data, messy_data, field_properties, canonicalize=False,
config_name="gazetteer_dataframe", update_model=False, threshold=0.3,
config_name="gazetteer_dataframe", settings_file=None, training_file=None, update_model=False, threshold=0.3,
sample_size=1, n_cores=None):
"""Deduplicates a dataframe given fields of interest.
Parameters
Expand Down Expand Up @@ -241,10 +241,10 @@ def gazetteer_dataframe(clean_data, messy_data, field_properties, canonicalize=F
attributes of the record.
"""
# Import Data
config_name = config_name.replace(" ", "_")

settings_file = config_name + '_learned_settings'
training_file = config_name + '_training.json'
if {settings_file, training_file} == {None}:
config_name = config_name.replace(" ", "_")
settings_file = config_name + '_learned_settings'
training_file = config_name + '_training.json'

print('Importing data ...')
assert type(clean_data)==pd.core.frame.DataFrame, 'Please provide a gazette in pandas dataframe format'
Expand Down

0 comments on commit d92d0c2

Please sign in to comment.