From 6e75f0331cfd0a451d8b17cdfa9e91bfd376e02f Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Sun, 25 Feb 2024 17:17:47 -0500 Subject: [PATCH 1/7] Allow simple fields to have JSON to support more field types --- workbench_fields.py | 377 +++++++++++++++++--------------------------- 1 file changed, 141 insertions(+), 236 deletions(-) diff --git a/workbench_fields.py b/workbench_fields.py index 54df2b1..f8e4a44 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -57,84 +57,40 @@ def create(self, config, field_definitions, entity, row, field_name): text_format = config["text_format_id"] id_field = row.get(config.get("id_field", "not_applicable"), "not_applicable") - # Cardinality is unlimited. - if field_definitions[field_name]["cardinality"] == -1: - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues - ) - subvalues = self.dedupe_values(subvalues) - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, id_field, field_definitions[field_name], subvalue - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append({"value": subvalue, "format": text_format}) - else: - field_values.append({"value": subvalue}) - entity[field_name] = field_values - else: - row[field_name] = truncate_csv_value( - field_name, id_field, field_definitions[field_name], row[field_name] + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + subvalues = self.dedupe_values(subvalues) + if field_definitions[field_name]["cardinality"] != -1: + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + log_field_cardinality_violation( + field_name, + id_field, + field_definitions[field_name]["cardinality"], ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - entity[field_name] = [ - {"value": row[field_name], "format": text_format} - ] + subvalues = subvalues[: field_definitions[field_name]["cardinality"]] + for subvalue in subvalues: + subvalue = truncate_csv_value( + field_name, id_field, field_definitions[field_name], subvalue + ) + if ( + "formatted_text" in field_definitions[field_name] + and field_definitions[field_name]["formatted_text"] is True + ): + json_str = self.get_json(subvalue) + if json_str is False: + field_values.append({"value": subvalue, "format": text_format}) else: - entity[field_name] = [{"value": row[field_name]}] - - # Cardinality has a limit, including 1. - else: - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues - ) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int(field_definitions[field_name]["cardinality"]): - log_field_cardinality_violation( - field_name, - id_field, - field_definitions[field_name]["cardinality"], - ) - subvalues = subvalues[: field_definitions[field_name]["cardinality"]] - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, id_field, field_definitions[field_name], subvalue - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append({"value": subvalue, "format": text_format}) - else: - field_values.append({"value": subvalue}) - field_values = self.dedupe_values(field_values) - entity[field_name] = field_values + field_values.append(json_str) else: - row[field_name] = truncate_csv_value( - field_name, id_field, field_definitions[field_name], row[field_name] - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - entity[field_name] = [ - {"value": row[field_name], "format": text_format} - ] + json_str = self.get_json(subvalue) + if json_str is False: + field_values.append({"value": subvalue}) else: - entity[field_name] = [{"value": row[field_name]}] - + field_values.append(json_str) + entity[field_name] = field_values return entity def update( @@ -186,212 +142,154 @@ def update( # Cardinality has a limit. if field_definitions[field_name]["cardinality"] > 0: if config["update_mode"] == "append": - if config["subdelimiter"] in row[field_name]: - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues - ) - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, - row[entity_id_field], - field_definitions[field_name], - subvalue, - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - entity[field_name].append( - {"value": subvalue, "format": text_format} - ) - else: - entity[field_name].append({"value": subvalue}) - entity[field_name] = self.dedupe_values(entity[field_name]) - if len(entity[field_name]) > int( - field_definitions[field_name]["cardinality"] - ): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - entity[field_name] = entity[field_name][ - : field_definitions[field_name]["cardinality"] - ] - else: - row[field_name] = self.remove_invalid_values( - config, field_definitions, field_name, row[field_name] - ) - row[field_name] = truncate_csv_value( + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + for subvalue in subvalues: + subvalue = truncate_csv_value( field_name, row[entity_id_field], field_definitions[field_name], - row[field_name], + subvalue, ) + json_str = self.get_json(subvalue) if ( "formatted_text" in field_definitions[field_name] and field_definitions[field_name]["formatted_text"] is True ): - entity[field_name].append( - {"value": row[field_name], "format": text_format} - ) + if json_str is False: + entity[field_name].append( + { + "value": subvalue, + "format": text_format, + } + ) + else: + entity[field_name].append(json_str) else: - entity[field_name].append({"value": row[field_name]}) - entity[field_name] = self.dedupe_values(entity[field_name]) - if len(entity[field_name]) > int( - field_definitions[field_name]["cardinality"] - ): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - entity[field_name] = entity[field_name][ - : field_definitions[field_name]["cardinality"] - ] + if json_str is False: + entity[field_name].append({"value": subvalue}) + else: + entity[field_name].append(json_str) + + entity[field_name] = self.dedupe_values(entity[field_name]) + if len(entity[field_name]) > int( + field_definitions[field_name]["cardinality"] + ): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], + ) + entity[field_name] = entity[field_name][ + : field_definitions[field_name]["cardinality"] + ] if config["update_mode"] == "replace": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + subvalues = self.dedupe_values(subvalues) + if len(subvalues) > int(field_definitions[field_name]["cardinality"]): + log_field_cardinality_violation( + field_name, + row[entity_id_field], + field_definitions[field_name]["cardinality"], ) - subvalues = self.dedupe_values(subvalues) - if len(subvalues) > int( - field_definitions[field_name]["cardinality"] - ): - log_field_cardinality_violation( - field_name, - row[entity_id_field], - field_definitions[field_name]["cardinality"], - ) - subvalues = subvalues[ - : field_definitions[field_name]["cardinality"] - ] - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, - row[entity_id_field], - field_definitions[field_name], - subvalue, - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append( - {"value": subvalue, "format": text_format} - ) - else: - field_values.append({"value": subvalue}) - field_values = self.dedupe_values(field_values) - entity[field_name] = field_values - else: - row[field_name] = truncate_csv_value( + subvalues = subvalues[ + : field_definitions[field_name]["cardinality"] + ] + for subvalue in subvalues: + subvalue = truncate_csv_value( field_name, row[entity_id_field], field_definitions[field_name], - row[field_name], + subvalue, ) + json_str = self.get_json(subvalue) if ( "formatted_text" in field_definitions[field_name] and field_definitions[field_name]["formatted_text"] is True ): - entity[field_name] = [ - {"value": row[field_name], "format": text_format} - ] + if json_str is False: + field_values.append( + { + "value": subvalue, + "format": text_format, + } + ) + else: + field_values.append(json_str) else: - entity[field_name] = [{"value": row[field_name]}] + if json_str is False: + field_values.append({"value": subvalue}) + else: + field_values.append(json_str) + field_values = self.dedupe_values(field_values) + entity[field_name] = field_values # Cardinatlity is unlimited. else: if config["update_mode"] == "append": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues - ) - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, - row[entity_id_field], - field_definitions[field_name], - subvalue, - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append( - {"value": subvalue, "format": text_format} - ) - else: - field_values.append({"value": subvalue}) - entity[field_name] = entity_field_values + field_values - entity[field_name] = self.dedupe_values(entity[field_name]) - else: - row[field_name] = truncate_csv_value( + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + for subvalue in subvalues: + subvalue = truncate_csv_value( field_name, row[entity_id_field], field_definitions[field_name], - row[field_name], + subvalue, ) if ( "formatted_text" in field_definitions[field_name] and field_definitions[field_name]["formatted_text"] is True ): - entity[field_name] = entity_field_values + [ - {"value": row[field_name], "format": text_format} - ] + field_values.append({"value": subvalue, "format": text_format}) else: - entity[field_name] = entity_field_values + [ - {"value": row[field_name]} - ] - entity[field_name] = self.dedupe_values(entity[field_name]) + field_values.append({"value": subvalue}) + entity[field_name] = entity_field_values + field_values + entity[field_name] = self.dedupe_values(entity[field_name]) + if config["update_mode"] == "replace": - if config["subdelimiter"] in row[field_name]: - field_values = [] - subvalues = row[field_name].split(config["subdelimiter"]) - subvalues = self.remove_invalid_values( - config, field_definitions, field_name, subvalues - ) - for subvalue in subvalues: - subvalue = truncate_csv_value( - field_name, - row[entity_id_field], - field_definitions[field_name], - subvalue, - ) - if ( - "formatted_text" in field_definitions[field_name] - and field_definitions[field_name]["formatted_text"] is True - ): - field_values.append( - {"value": subvalue, "format": text_format} - ) - else: - field_values.append({"value": subvalue}) - entity[field_name] = field_values - entity[field_name] = self.dedupe_values(entity[field_name]) - else: - row[field_name] = truncate_csv_value( + field_values = [] + subvalues = row[field_name].split(config["subdelimiter"]) + subvalues = self.remove_invalid_values( + config, field_definitions, field_name, subvalues + ) + for subvalue in subvalues: + subvalue = truncate_csv_value( field_name, row[entity_id_field], field_definitions[field_name], - row[field_name], + subvalue, ) + json_str = self.get_json(subvalue) if ( "formatted_text" in field_definitions[field_name] and field_definitions[field_name]["formatted_text"] is True ): - entity[field_name] = [ - {"value": row[field_name], "format": text_format} - ] + if json_str is False: + field_values.append( + { + "value": subvalue, + "format": text_format, + } + ) + else: + field_values.append(json_str) else: - entity[field_name] = [{"value": row[field_name]}] + if json_str is False: + field_values.append({"value": subvalue}) + else: + field_values.append(json_str) + entity[field_name] = field_values + entity[field_name] = self.dedupe_values(entity[field_name]) return entity @@ -504,6 +402,13 @@ def serialize(self, config, field_definitions, field_name, field_data): else: return subvalues[0] + def get_json(self, json_str): + try: + j = json.loads(json_str) + return j + except ValueError: + return False + class GeolocationField: """Functions for handling fields with 'geolocation' Drupal field data type. From 127ae20627f4d0d472c4a7db97224c3694ff9959 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Mon, 11 Nov 2024 12:03:13 -0500 Subject: [PATCH 2/7] dedupe --- workbench_fields.py | 1 + 1 file changed, 1 insertion(+) diff --git a/workbench_fields.py b/workbench_fields.py index 840cb3c..636b124 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -102,6 +102,7 @@ def create(self, config, field_definitions, entity, row, field_name): field_values.append({"value": subvalue}) else: field_values.append(json_str) + field_values = self.dedupe_values(field_values) entity[field_name] = field_values return entity From 27dd6d8493431158d75399ff1af687300d43d922 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Mon, 11 Nov 2024 12:05:25 -0500 Subject: [PATCH 3/7] fixup --- workbench_fields.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/workbench_fields.py b/workbench_fields.py index 636b124..e21dad6 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -179,7 +179,7 @@ def update( {"value": subvalue, "format": text_format} ) else: - field_values.append(json_str) + entity[field_name].append(json_str) else: if field_definitions[field_name][ "field_type" @@ -224,9 +224,7 @@ def update( and field_definitions[field_name]["formatted_text"] is True ): if json_str is False: - entity[field_name].append( - {"value": subvalue, "format": text_format} - ) + field_values.append({"value": subvalue, "format": text_format}) else: field_values.append(json_str) else: @@ -239,9 +237,9 @@ def update( ] == "float" and value_is_numeric(subvalue, allow_decimals=True): subvalue = float(subvalue) if json_str is False: - entity[field_name].append({"value": subvalue}) + field_values.append({"value": subvalue}) else: - entity[field_name].append(json_str) + field_values.append(json_str) field_values = self.dedupe_values(field_values) entity[field_name] = field_values From a93275d423e612f9ada397843bbfb289426cc7cb Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Mon, 11 Nov 2024 12:18:29 -0500 Subject: [PATCH 4/7] handle numerical values --- workbench_fields.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/workbench_fields.py b/workbench_fields.py index e21dad6..3abe8ef 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -388,11 +388,15 @@ def serialize(self, config, field_definitions, field_name, field_data): return subvalues[0] def get_json(self, json_str): + if value_is_numeric(json_str, allow_decimals=True): + return False + try: j = json.loads(json_str) return j except ValueError: return False + return False class GeolocationField: From 6f1287ce9f2e9c34c66695e9f7d163c08ff7dfbe Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Mon, 11 Nov 2024 12:19:50 -0500 Subject: [PATCH 5/7] fixup --- workbench_fields.py | 1 - 1 file changed, 1 deletion(-) diff --git a/workbench_fields.py b/workbench_fields.py index 3abe8ef..2ff3f1c 100644 --- a/workbench_fields.py +++ b/workbench_fields.py @@ -396,7 +396,6 @@ def get_json(self, json_str): return j except ValueError: return False - return False class GeolocationField: From f68cb8f8f31848715a0b9bb3130e7dcb54346a6e Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Thu, 14 Nov 2024 11:43:16 -0500 Subject: [PATCH 6/7] merge in #849 --- workbench | 1 + 1 file changed, 1 insertion(+) diff --git a/workbench b/workbench index 9d20a13..061717c 100755 --- a/workbench +++ b/workbench @@ -181,6 +181,7 @@ def create(): if "field_member_of" in row.keys() and ( len(row["field_member_of"]) > 0 and value_is_numeric(row["field_member_of"]) is False + and config["subdelimiter"] not in row["field_member_of"] ): field_member_of_value_for_message = copy.copy(row["field_member_of"]) row["field_member_of"] = get_nid_from_url_alias( From 6173b868e2fabe4dbbd6869e46ae0a259b80b7f1 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Wed, 11 Dec 2024 15:47:47 -0500 Subject: [PATCH 7/7] m4v --- WorkbenchConfig.py | 1 + 1 file changed, 1 insertion(+) diff --git a/WorkbenchConfig.py b/WorkbenchConfig.py index 4781727..cf188e8 100644 --- a/WorkbenchConfig.py +++ b/WorkbenchConfig.py @@ -156,6 +156,7 @@ def get_media_types(self): "webm", "ogv", "mpeg", + "m4v", ] }, {"extracted_text": ["txt"]},