From c2fce233cd8d35f2b622d143e4b524426fe1dcc9 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Wed, 22 Mar 2023 17:18:18 -0500
Subject: [PATCH 1/3] Add squre bracket in column validation for spreadsheets. 
 Update error handling slightly(error list is now sorted by context always)

---
 hed/errors/__init__.py                        |  2 +-
 hed/errors/error_messages.py                  | 25 ++++-
 hed/errors/error_reporter.py                  | 71 ++++++++++---
 hed/errors/error_types.py                     |  7 ++
 hed/models/base_input.py                      | 56 +++++++----
 hed/models/sidecar.py                         |  3 +-
 hed/schema/schema_compliance.py               |  2 +-
 hed/validator/sidecar_validator.py            |  5 +-
 hed/validator/spreadsheet_validator.py        | 99 +++++++++++++++++--
 tests/errors/test_error_reporter.py           |  8 +-
 tests/models/test_base_input.py               | 48 ++-------
 tests/schema/test_convert_tags.py             |  2 +-
 tests/validator/test_onset_validator.py       |  4 +-
 tests/validator/test_spreadsheet_validator.py | 57 +++++++++++
 tests/validator/test_tag_validator_base.py    |  2 +-
 15 files changed, 298 insertions(+), 93 deletions(-)
 create mode 100644 tests/validator/test_spreadsheet_validator.py

diff --git a/hed/errors/__init__.py b/hed/errors/__init__.py
index 0583dd562..c2f58a07c 100644
--- a/hed/errors/__init__.py
+++ b/hed/errors/__init__.py
@@ -1,4 +1,4 @@
-from .error_reporter import ErrorHandler, get_exception_issue_string, get_printable_issue_string
+from .error_reporter import ErrorHandler, get_exception_issue_string, get_printable_issue_string, sort_issues
 from .error_types import DefinitionErrors, OnsetErrors, SchemaErrors, SchemaWarnings,  SidecarErrors, ValidationErrors
 from .error_types import ErrorContext, ErrorSeverity
 from .exceptions import HedExceptions, HedFileError
diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py
index 9ae9557f3..fffc549dd 100644
--- a/hed/errors/error_messages.py
+++ b/hed/errors/error_messages.py
@@ -6,7 +6,7 @@
 
 from hed.errors.error_reporter import hed_error, hed_tag_error
 from hed.errors.error_types import ValidationErrors, SchemaErrors, \
-    SidecarErrors, SchemaWarnings, ErrorSeverity, DefinitionErrors, OnsetErrors
+    SidecarErrors, SchemaWarnings, ErrorSeverity, DefinitionErrors, OnsetErrors, ColumnErrors
 
 
 @hed_tag_error(ValidationErrors.HED_UNITS_INVALID)
@@ -342,3 +342,26 @@ def onset_wrong_placeholder(tag, has_placeholder):
     if has_placeholder:
         return f"Onset/offset def tag {tag} expects a placeholder value, but does not have one."
     return f"Onset/offset def tag {tag} should not have a placeholder, but has one."
+
+
+@hed_error(ColumnErrors.INVALID_COLUMN_REF)
+def invalid_column_ref(bad_refs):
+    return f"Bad column references found(columns do not exist): {bad_refs}"
+
+
+@hed_error(ColumnErrors.SELF_COLUMN_REF)
+def self_column_ref(self_ref):
+    return f"Column references itself: {self_ref}"
+
+
+@hed_error(ColumnErrors.NESTED_COLUMN_REF)
+def nested_column_ref(column_name, ref_column):
+    return f"Column {column_name} has a nested reference to {ref_column}.  " \
+           f"Column reference columns cannot contain other column references."
+
+
+@hed_error(ColumnErrors.MALFORMED_COLUMN_REF)
+def nested_column_ref(column_name, index, symbol):
+    return f"Column {column_name} has a malformed column reference.  Improper symbol {symbol} found at index {index}."
+
+
diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py
index 4a7fd91a9..cb1a959d5 100644
--- a/hed/errors/error_reporter.py
+++ b/hed/errors/error_reporter.py
@@ -10,6 +10,27 @@
 
 error_functions = {}
 
+# Controls if the default issue printing skips adding indentation for this context
+no_tab_context = {ErrorContext.HED_STRING, ErrorContext.SCHEMA_ATTRIBUTE}
+
+# Default sort ordering for issues list
+default_sort_list = [
+    ErrorContext.CUSTOM_TITLE,
+    ErrorContext.FILE_NAME,
+    ErrorContext.SIDECAR_COLUMN_NAME,
+    ErrorContext.SIDECAR_KEY_NAME,
+    ErrorContext.ROW,
+    ErrorContext.COLUMN,
+    ErrorContext.HED_STRING,
+    ErrorContext.SCHEMA_SECTION,
+    ErrorContext.SCHEMA_TAG,
+    ErrorContext.SCHEMA_ATTRIBUTE,
+]
+
+# ErrorContext which is expected to be int based.
+int_sort_list = [
+    ErrorContext.ROW,
+]
 
 def _register_error_function(error_type, wrapper_func):
     if error_type in error_functions:
@@ -153,19 +174,23 @@ def __init__(self, check_for_warnings=True):
         self.error_context = []
         self._check_for_warnings = check_for_warnings
 
-    def push_error_context(self, context_type, context, increment_depth_after=True):
+    def push_error_context(self, context_type, context):
         """ Push a new error context to narrow down error scope.
 
         Parameters:
             context_type (ErrorContext): A value from ErrorContext representing the type of scope.
             context (str, int, or HedString): The main value for the context_type.
-            increment_depth_after (bool): If True, add an extra tab to any subsequent errors in the scope.
 
         Notes:
             The context depends on the context_type. For ErrorContext.FILE_NAME this would be the actual filename.
 
         """
-        self.error_context.append((context_type, context, increment_depth_after))
+        if context is None:
+            if context_type in int_sort_list:
+                context = 0
+            else:
+                context_type = ""
+        self.error_context.append((context_type, context))
 
     def pop_error_context(self):
         """ Remove the last scope from the error context.
@@ -292,8 +317,8 @@ def _add_context_to_errors(error_object, error_context_to_add):
         """
         if error_object is None:
             error_object = {}
-        for (context_type, context, increment_count) in error_context_to_add:
-            error_object[context_type] = (context, increment_count)
+        for (context_type, context) in error_context_to_add:
+            error_object[context_type] = context
 
         return error_object
 
@@ -330,7 +355,7 @@ def _get_tag_span_to_error_object(error_object):
         else:
             return None, None
 
-        hed_string = error_object[ErrorContext.HED_STRING][0]
+        hed_string = error_object[ErrorContext.HED_STRING]
         span = hed_string._get_org_span(source_tag)
         return span
 
@@ -385,6 +410,7 @@ def filter_issues_by_severity(issues_list, severity):
 
 def get_exception_issue_string(issues, title=None):
     """ Return a string with issues list flatted into single string, one issue per line.
+        Possibly being deprecated.
 
     Parameters:
         issues (list):  A list of strings containing issues to print.
@@ -410,6 +436,29 @@ def get_exception_issue_string(issues, title=None):
     return issue_str
 
 
+def sort_issues(issues, reverse=False):
+    """Sorts a list of issues by the error context values.
+
+    Parameters:
+        issues (list): A list of dictionaries representing the issues to be sorted.
+        reverse (bool, optional): If True, sorts the list in descending order. Default is False.
+
+    Returns:
+        list: The sorted list of issues."""
+    def _get_keys(d):
+        result = []
+        for key in default_sort_list:
+            if key in int_sort_list:
+                result.append(d.get(key, -1))
+            else:
+                result.append(d.get(key, ""))
+        return tuple(result)
+
+    issues = sorted(issues, key=_get_keys, reverse=reverse)
+
+    return issues
+
+
 def get_printable_issue_string(issues, title=None, severity=None, skip_filename=True):
     """ Return a string with issues list flatted into single string, one per line.
 
@@ -471,7 +520,7 @@ def _get_context_from_issue(val_issue, skip_filename=True):
         if skip_filename and key == ErrorContext.FILE_NAME:
             continue
         if key.startswith("ec_"):
-            single_issue_context.append((key, *val_issue[key]))
+            single_issue_context.append((key, val_issue[key]))
 
     return single_issue_context
 
@@ -512,7 +561,7 @@ def _get_context_string(single_issue_context, last_used_context):
     """ Convert a single context list into the final human readable output form.
 
     Parameters:
-        single_issue_context (list): A list of tuples containing the context(context_type, context, increment_tab)
+        single_issue_context (list): A list of tuples containing the context(context_type, context)
         last_used_context (list): A list of tuples containing the last drawn context.
 
     Returns:
@@ -528,18 +577,18 @@ def _get_context_string(single_issue_context, last_used_context):
     tab_count = 0
     found_difference = False
     for i, context_tuple in enumerate(single_issue_context):
-        (context_type, context, increment_tab) = context_tuple
+        (context_type, context) = context_tuple
         if len(last_used_context) > i and not found_difference:
             last_drawn = last_used_context[i]
             # Was drawn, and hasn't changed.
             if last_drawn == context_tuple:
-                if increment_tab:
+                if context_type not in no_tab_context:
                     tab_count += 1
                 continue
 
         context_string += _format_single_context_string(context_type, context, tab_count)
         found_difference = True
-        if increment_tab:
+        if context_type not in no_tab_context:
             tab_count += 1
 
     tab_string = '\t' * tab_count
diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
index ac76f6992..66cfaecd1 100644
--- a/hed/errors/error_types.py
+++ b/hed/errors/error_types.py
@@ -117,3 +117,10 @@ class OnsetErrors:
     ONSET_PLACEHOLDER_WRONG = "ONSET_PLACEHOLDER_WRONG"
     ONSET_TOO_MANY_DEFS = "ONSET_TOO_MANY_DEFS"
     ONSET_TAG_OUTSIDE_OF_GROUP = "ONSET_TAG_OUTSIDE_OF_GROUP"
+
+
+class ColumnErrors:
+    INVALID_COLUMN_REF = "INVALID_COLUMN_REF"
+    SELF_COLUMN_REF = "SELF_COLUMN_REF"
+    NESTED_COLUMN_REF = "NESTED_COLUMN_REF"
+    MALFORMED_COLUMN_REF = "MALFORMED_COLUMN_REF"
diff --git a/hed/models/base_input.py b/hed/models/base_input.py
index af6249f56..f0e4209c2 100644
--- a/hed/models/base_input.py
+++ b/hed/models/base_input.py
@@ -354,33 +354,45 @@ def _dataframe_has_names(dataframe):
                 return True
         return False
 
-    def assemble(self, mapper=None):
+    def assemble(self, mapper=None, skip_square_brackets=False):
         """ Assembles the hed strings
 
         Parameters:
             mapper(ColumnMapper or None): Generally pass none here unless you want special behavior.
-
+            skip_square_brackets (bool): If True, don't plug in square bracket values into columns.
         Returns:
             Dataframe: the assembled dataframe
         """
         if mapper is None:
             mapper = self._mapper
 
+        all_columns = self._handle_transforms(mapper)
+        if skip_square_brackets:
+            return all_columns
+        transformers, _ = mapper.get_transformers()
+
+        return self._handle_square_brackets(all_columns, list(transformers))
+
+    def _handle_transforms(self, mapper):
         transformers, need_categorical = mapper.get_transformers()
-        if not transformers:
-            return self._dataframe
-        all_columns = self._dataframe
-        if need_categorical:
-            all_columns[need_categorical] = all_columns[need_categorical].astype('category')
+        if transformers:
+            all_columns = self._dataframe
+            if need_categorical:
+                all_columns[need_categorical] = all_columns[need_categorical].astype('category')
 
-        all_columns = all_columns.transform(transformers)
+            all_columns = all_columns.transform(transformers)
+
+            if need_categorical:
+                all_columns[need_categorical] = all_columns[need_categorical].astype('str')
+        else:
+            all_columns = self._dataframe
 
-        return self._insert_columns(all_columns, list(transformers.keys()))
+        return all_columns
 
     @staticmethod
-    def _find_column_refs(df):
+    def _find_column_refs(df, column_names):
         found_column_references = []
-        for column_name in df:
+        for column_name in column_names:
             df_temp = df[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE)
             u_vals = pd.Series([j for i in df_temp if isinstance(i, list) for j in i], dtype=str)
             u_vals = u_vals.unique()
@@ -391,21 +403,23 @@ def _find_column_refs(df):
         return found_column_references
 
     @staticmethod
-    def _insert_columns(df, known_columns=None):
-        if known_columns is None:
-            known_columns = list(df.columns)
-        possible_column_references = [f"{column_name}" for column_name in df.columns if
+    def _handle_square_brackets(df, known_columns=None):
+        """
+            Plug in square brackets with other columns
+
+            If known columns is passed, only use those columns to find or replace references.
+        """
+        if known_columns is not None:
+            column_names = list(known_columns)
+        else:
+            column_names = list(df.columns)
+        possible_column_references = [f"{column_name}" for column_name in column_names if
                                       isinstance(column_name, str) and column_name.lower() != "hed"]
-        found_column_references = BaseInput._find_column_refs(df)
+        found_column_references = BaseInput._find_column_refs(df, column_names)
 
-        invalid_replacements = [col for col in found_column_references if col not in possible_column_references]
-        if invalid_replacements:
-            # todo: This check may be moved to validation
-            raise ValueError(f"Bad column references found(columns do not exist): {invalid_replacements}")
         valid_replacements = [col for col in found_column_references if col in possible_column_references]
 
         # todo: break this into a sub function(probably)
-        column_names = known_columns
         for column_name in valid_replacements:
             column_names.remove(column_name)
         saved_columns = df[valid_replacements]
diff --git a/hed/models/sidecar.py b/hed/models/sidecar.py
index 280eba77d..958cadfba 100644
--- a/hed/models/sidecar.py
+++ b/hed/models/sidecar.py
@@ -255,8 +255,7 @@ def extract_definitions(self, hed_schema=None, error_handler=None):
         if hed_schema:
             for hed_string, column_data, _ in self.hed_string_iter(error_handler):
                 hed_string_obj = HedString(hed_string, hed_schema)
-                error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj,
-                                                 increment_depth_after=False)
+                error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
                 self._extract_definition_issues += def_dict.check_for_definitions(hed_string_obj, error_handler)
                 error_handler.pop_error_context()
 
diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py
index 84c2accbf..c0b821723 100644
--- a/hed/schema/schema_compliance.py
+++ b/hed/schema/schema_compliance.py
@@ -60,7 +60,7 @@ def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handl
             for attribute_name in tag_entry.attributes:
                 validator = schema_attribute_validators.get(attribute_name)
                 if validator:
-                    error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name, False)
+                    error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name)
                     new_issues = validator(hed_schema, tag_entry, tag_entry.attributes[attribute_name])
                     error_handler.add_context_and_filter(new_issues)
                     issues_list += new_issues
diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py
index af12005b1..daa71fb07 100644
--- a/hed/validator/sidecar_validator.py
+++ b/hed/validator/sidecar_validator.py
@@ -4,6 +4,7 @@
 from hed import HedString
 from hed import Sidecar
 from hed.models.column_metadata import ColumnMetadata
+from hed.errors.error_reporter import sort_issues
 
 
 class SidecarValidator:
@@ -49,8 +50,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None)
         for hed_string, column_data, position in sidecar.hed_string_iter(error_handler):
             hed_string_obj = HedString(hed_string, hed_schema=self._schema, def_dict=sidecar_def_dict)
 
-            error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj,
-                                             increment_depth_after=False)
+            error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
             new_issues = hed_validator.run_basic_checks(hed_string_obj, allow_placeholders=True)
             if not new_issues:
                 new_issues = hed_validator.run_full_string_checks(hed_string_obj)
@@ -61,6 +61,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None)
             error_handler.pop_error_context()
 
         error_handler.pop_error_context()
+        issues = sort_issues(issues)
         return issues
 
     def validate_structure(self, sidecar, error_handler):
diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py
index ba1f341ac..8b8aa9b1f 100644
--- a/hed/validator/spreadsheet_validator.py
+++ b/hed/validator/spreadsheet_validator.py
@@ -1,9 +1,12 @@
 import pandas as pd
+import re
 from hed import BaseInput
 from hed.errors import ErrorHandler, ValidationErrors, ErrorContext
+from hed.errors.error_types import ColumnErrors
 from hed.models import ColumnType
 from hed import HedString
 from hed.models.hed_string_group import HedStringGroup
+from hed.errors.error_reporter import sort_issues
 
 PANDAS_COLUMN_PREFIX_TO_IGNORE = "Unnamed: "
 
@@ -25,6 +28,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
 
         Parameters:
             data (BaseInput or pd.DataFrame): Input data to be validated.
+                If a dataframe, it is assumed to be assembled already.
             def_dicts(list of DefDict or DefDict): all definitions to use for validation
             name(str): The name to report errors from this file as
             error_handler (ErrorHandler): Error context to use.  Creates a new one if None
@@ -41,31 +45,32 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
         # Check the structure of the input data, if it's a BaseInput
         if isinstance(data, BaseInput):
             issues += self._validate_column_structure(data, error_handler)
-            # todo ian: Add more checks here for column inserters
+            issues += self._validate_square_brackets(data.assemble(skip_square_brackets=True), error_handler)
             data = data.dataframe_a
 
         # Check the rows of the input data
         issues += self._run_checks(data, error_handler)
         error_handler.pop_error_context()
+
+        issues = sort_issues(issues)
         return issues
 
     def _run_checks(self, data, error_handler):
         issues = []
+        columns = list(data.columns)
         for row_number, text_file_row in enumerate(data.itertuples(index=False)):
             error_handler.push_error_context(ErrorContext.ROW, row_number)
             row_strings = []
             new_column_issues = []
-            # todo: make this report the correct column numbers(somehow  - it almost surely doesn't right now)
             for column_number, cell in enumerate(text_file_row):
                 if not cell or cell == "n/a":
                     continue
 
-                error_handler.push_error_context(ErrorContext.COLUMN, column_number)
+                error_handler.push_error_context(ErrorContext.COLUMN, columns[column_number])
 
                 column_hed_string = HedString(cell)
                 row_strings.append(column_hed_string)
-                error_handler.push_error_context(ErrorContext.HED_STRING, column_hed_string,
-                                                 increment_depth_after=False)
+                error_handler.push_error_context(ErrorContext.HED_STRING, column_hed_string)
                 new_column_issues = self._hed_validator.run_basic_checks(column_hed_string, allow_placeholders=False)
 
                 error_handler.add_context_and_filter(new_column_issues)
@@ -77,7 +82,7 @@ def _run_checks(self, data, error_handler):
                 continue
             else:
                 row_string = HedStringGroup(row_strings)
-                error_handler.push_error_context(ErrorContext.HED_STRING, row_string, increment_depth_after=False)
+                error_handler.push_error_context(ErrorContext.HED_STRING, row_string)
                 new_column_issues = self._hed_validator.run_full_string_checks(row_string)
 
                 error_handler.add_context_and_filter(new_column_issues)
@@ -113,3 +118,85 @@ def _validate_column_structure(self, base_input, error_handler):
                 error_handler.pop_error_context()
 
         return issues
+
+    @staticmethod
+    def _validate_column_refs(df, error_handler):
+        possible_column_references = [f"{column_name}" for column_name in df.columns if
+                                      isinstance(column_name, str) and column_name.lower() != "hed"]
+
+        issues = []
+        found_column_references = {}
+        for column_name in df:
+            matches = df[column_name].str.findall("\[([a-z_\-\s0-9]+)(?<!\[)\]", re.IGNORECASE)
+            for row_number, row in enumerate(matches):
+                for match in row:
+                    if match not in possible_column_references:
+                        error_handler.push_error_context(ErrorContext.ROW, row_number)
+                        error_handler.push_error_context(ErrorContext.COLUMN, column_name)
+                        error_handler.push_error_context(ErrorContext.HED_STRING, df[column_name][row_number])
+
+                        issues += error_handler.format_error_with_context(ColumnErrors.INVALID_COLUMN_REF,
+                                                                          match)
+                        error_handler.pop_error_context()
+                        error_handler.pop_error_context()
+                        error_handler.pop_error_context()
+
+            references = [match for sublist in matches for match in sublist]
+            if references:
+                found_column_references[column_name] = references
+            if column_name in references:
+                issues += error_handler.format_error_with_context(ColumnErrors.SELF_COLUMN_REF, column_name)
+        for column_name, refs in found_column_references.items():
+            for ref in refs:
+                if ref in found_column_references:
+                    issues += error_handler.format_error_with_context(ColumnErrors.NESTED_COLUMN_REF, column_name,
+                                                                      ref)
+        return issues
+
+    @staticmethod
+    def _invalid_bracket_locations(text):
+        first_opening = text.find('[')
+        last_closing = text.rfind(']')
+
+        invalid_location = None
+
+        if first_opening == -1 or last_closing == -1:
+            if first_opening != -1:
+                invalid_location = first_opening
+            elif last_closing != -1:
+                invalid_location = last_closing
+        else:
+            for index, char in enumerate(text[first_opening + 1:last_closing]):
+                if char == '[' or char == ']':
+                    invalid_location = index + first_opening + 1
+                    break
+
+        return invalid_location
+
+    @staticmethod
+    def _validate_missing_malformed_brackets(df, error_handler):
+        invalid_brackets_df = df.applymap(lambda x: SpreadsheetValidator._invalid_bracket_locations(str(x)))
+
+        issues = []
+        s = invalid_brackets_df.stack()
+        s = s.dropna()
+        for index, value in s.items():
+            row, column = index
+            cell_text = df[column][row]
+            error_handler.push_error_context(ErrorContext.ROW, row)
+            error_handler.push_error_context(ErrorContext.COLUMN, column)
+            error_handler.push_error_context(ErrorContext.HED_STRING, cell_text)
+            issues += error_handler.format_error_with_context(ColumnErrors.MALFORMED_COLUMN_REF, column, value,
+                                                              cell_text[value])
+            error_handler.pop_error_context()
+            error_handler.pop_error_context()
+            error_handler.pop_error_context()
+
+        return issues
+
+    def _validate_square_brackets(self, df, error_handler):
+        issues = []
+        issues += self._validate_column_refs(df, error_handler)
+        issues += self._validate_missing_malformed_brackets(df, error_handler)
+
+        return issues
\ No newline at end of file
diff --git a/tests/errors/test_error_reporter.py b/tests/errors/test_error_reporter.py
index 28c7fbffe..40765f51b 100644
--- a/tests/errors/test_error_reporter.py
+++ b/tests/errors/test_error_reporter.py
@@ -23,11 +23,11 @@ def test_push_error_context(self):
         self.error_handler.reset_error_context()
         self.error_handler.push_error_context(ErrorContext.FILE_NAME, name)
         self.error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
-        self.error_handler.push_error_context(ErrorContext.COLUMN, 1)
+        self.error_handler.push_error_context(ErrorContext.COLUMN, column_name)
         error_list = self.error_handler.format_error_with_context(ValidationErrors.HED_TAG_NOT_UNIQUE, "")
         self.assertTrue(name in error_list[0][ErrorContext.FILE_NAME])
         self.assertTrue(column_name in error_list[0][ErrorContext.SIDECAR_COLUMN_NAME])
-        self.assertTrue(1 in error_list[0][ErrorContext.COLUMN])
+        self.assertTrue(column_name == error_list[0][ErrorContext.COLUMN])
         self.assertTrue(len(error_list) == 1)
         self.error_handler.reset_error_context()
 
@@ -48,12 +48,12 @@ def test_pop_error_context(self):
         self.assertTrue(len(error_list) == 1)
         self.error_handler.push_error_context(ErrorContext.FILE_NAME, name)
         self.error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
-        self.error_handler.push_error_context(ErrorContext.COLUMN, 1)
+        self.error_handler.push_error_context(ErrorContext.COLUMN, column_name)
         error_list = self.error_handler.format_error_with_context(ValidationErrors.HED_TAG_NOT_UNIQUE, "")
         self.assertTrue(len(error_list) == 1)
         self.assertTrue(name in error_list[0][ErrorContext.FILE_NAME])
         self.assertTrue(column_name in error_list[0][ErrorContext.SIDECAR_COLUMN_NAME])
-        self.assertTrue(1 in error_list[0][ErrorContext.COLUMN])
+        self.assertTrue(column_name == error_list[0][ErrorContext.COLUMN])
         self.error_handler.pop_error_context()
         self.error_handler.pop_error_context()
         self.error_handler.pop_error_context()
diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py
index 8404be04e..30f3714aa 100644
--- a/tests/models/test_base_input.py
+++ b/tests/models/test_base_input.py
@@ -110,7 +110,7 @@ def test_insert_columns_simple(self):
         expected_df = pd.DataFrame({
             "column1": ["Item, Event, Action"]
         })
-        result = BaseInput._insert_columns(df)
+        result = BaseInput._handle_square_brackets(df)
         pd.testing.assert_frame_equal(result, expected_df)
 
     def test_insert_columns_multiple_rows(self):
@@ -121,17 +121,9 @@ def test_insert_columns_multiple_rows(self):
         expected_df = pd.DataFrame({
             "column1": ["Item, Event, Action", "Event, Action"]
         })
-        result = BaseInput._insert_columns(df)
+        result = BaseInput._handle_square_brackets(df)
         pd.testing.assert_frame_equal(result, expected_df)
 
-    # def test_insert_columns_no_circular_reference(self):
-    #     df = pd.DataFrame({
-    #         "column1": ["[column2], Event, Action"],
-    #         "column2": ["[column1], Item"]
-    #     })
-    #     with self.assertRaises(ValueError):
-    #         result = BaseInput._insert_columns(df)
-
     def test_insert_columns_multiple_columns(self):
         df = pd.DataFrame({
             "column1": ["[column2], Event, [column3], Action"],
@@ -141,17 +133,9 @@ def test_insert_columns_multiple_columns(self):
         expected_df = pd.DataFrame({
             "column1": ["Item, Event, Subject, Action"]
         })
-        result = BaseInput._insert_columns(df)
+        result = BaseInput._handle_square_brackets(df)
         pd.testing.assert_frame_equal(result, expected_df)
 
-    def test_insert_columns_invalid_column_name(self):
-        df = pd.DataFrame({
-            "column1": ["[invalid_column], Event, Action"],
-            "column2": ["Item"]
-        })
-        with self.assertRaises(ValueError):
-            result = BaseInput._insert_columns(df)
-
     def test_insert_columns_four_columns(self):
         df = pd.DataFrame({
             "column1": ["[column2], Event, [column3], Action"],
@@ -163,25 +147,9 @@ def test_insert_columns_four_columns(self):
             "column1": ["Item, Event, Subject, Action"],
             "column4": ["Data"]
         })
-        result = BaseInput._insert_columns(df)
+        result = BaseInput._handle_square_brackets(df)
         pd.testing.assert_frame_equal(result, expected_df)
 
-    # def test_insert_columns_invalid_syntax(self):
-    #     df = pd.DataFrame({
-    #         "column1": ["column2], Event, Action"],
-    #         "column2": ["Item"]
-    #     })
-    #     with self.assertRaises(ValueError):
-    #         result = BaseInput._insert_columns(df)
-
-    # def test_insert_columns_no_self_reference(self):
-    #     df = pd.DataFrame({
-    #         "column1": ["[column1], Event, Action"],
-    #         "column2": ["Item"]
-    #     })
-    #     with self.assertRaises(ValueError):
-    #         result = BaseInput._insert_columns(df)
-
 
 class TestCombineDataframe(unittest.TestCase):
     def test_combine_dataframe_with_strings(self):
@@ -248,7 +216,7 @@ def test_simple_column_refs(self):
             'B': ['tag3, tag4', '[col3]'],
         }
         df1 = pd.DataFrame(data1)
-        result1 = BaseInput._find_column_refs(df1)
+        result1 = BaseInput._find_column_refs(df1, df1.columns)
         expected1 = ['col1', 'col2', 'col3']
         self.assertEqual(result1, expected1)
 
@@ -257,7 +225,7 @@ def test_mixed_cases_and_patterns(self):
             'A': ['[Col1], [col2]', 'tag1, [Col3]', 'tag3, [COL4]', '[col5], [col6]'],
         }
         df2 = pd.DataFrame(data2)
-        result2 = BaseInput._find_column_refs(df2)
+        result2 = BaseInput._find_column_refs(df2, df2.columns)
         expected2 = ['Col1', 'col2', 'Col3', 'COL4', 'col5', 'col6']
         self.assertEqual(result2, expected2)
 
@@ -267,7 +235,7 @@ def test_no_column_references(self):
             'B': ['tag5, tag6', 'tag7, tag8'],
         }
         df3 = pd.DataFrame(data3)
-        result3 = BaseInput._find_column_refs(df3)
+        result3 = BaseInput._find_column_refs(df3, df3.columns)
         expected3 = []
         self.assertEqual(result3, expected3)
 
@@ -277,6 +245,6 @@ def test_incomplete_square_brackets(self):
             'B': ['tag3, [COL4', '[col5, col6]'],
         }
         df4 = pd.DataFrame(data4)
-        result4 = BaseInput._find_column_refs(df4)
+        result4 = BaseInput._find_column_refs(df4, df4.columns)
         expected4 = ['col2']
         self.assertEqual(result4, expected4)
\ No newline at end of file
diff --git a/tests/schema/test_convert_tags.py b/tests/schema/test_convert_tags.py
index ebfa134a1..c015facb6 100644
--- a/tests/schema/test_convert_tags.py
+++ b/tests/schema/test_convert_tags.py
@@ -14,7 +14,7 @@ def converter_base(self, test_strings, expected_results, expected_errors, conver
         for test_key in test_strings:
             test_string_obj = HedString(test_strings[test_key])
             error_handler = ErrorHandler()
-            error_handler.push_error_context(ErrorContext.HED_STRING, test_string_obj, increment_depth_after=False)
+            error_handler.push_error_context(ErrorContext.HED_STRING, test_string_obj)
             test_issues = test_string_obj.convert_to_canonical_forms(self.hed_schema)
             if convert_to_short:
                 string_result = test_string_obj.get_as_short()
diff --git a/tests/validator/test_onset_validator.py b/tests/validator/test_onset_validator.py
index de46d116b..ef7e3aa54 100644
--- a/tests/validator/test_onset_validator.py
+++ b/tests/validator/test_onset_validator.py
@@ -48,7 +48,7 @@ def _test_issues_base(self, test_strings, test_issues, test_context, placeholder
         for string, expected_params, context in zip(test_strings, test_issues, test_context):
             test_string = HedString(string, self.hed_schema)
             error_handler = ErrorHandler()
-            error_handler.push_error_context(ErrorContext.HED_STRING, test_string, increment_depth_after=False)
+            error_handler.push_error_context(ErrorContext.HED_STRING, test_string)
 
             onset_issues = []
             onset_issues += validator.validate_onset_offset(test_string)
@@ -67,7 +67,7 @@ def _test_issues_no_context(self, test_strings, test_issues):
         for string, expected_params in zip(test_strings, test_issues):
             test_string = HedString(string)
             error_handler = ErrorHandler(check_for_warnings=False)
-            error_handler.push_error_context(ErrorContext.HED_STRING, test_string, increment_depth_after=False)
+            error_handler.push_error_context(ErrorContext.HED_STRING, test_string)
             onset_issues = hed_validator.validate(test_string, False)
             error_handler.add_context_and_filter(onset_issues)
             issues = self.format_errors_fully(error_handler, hed_string=test_string, params=expected_params)
diff --git a/tests/validator/test_spreadsheet_validator.py b/tests/validator/test_spreadsheet_validator.py
new file mode 100644
index 000000000..ef43f9bf3
--- /dev/null
+++ b/tests/validator/test_spreadsheet_validator.py
@@ -0,0 +1,57 @@
+import pandas as pd
+import unittest
+from hed import BaseInput, load_schema_version
+from hed.validator import SpreadsheetValidator
+from hed.errors import ErrorHandler, sort_issues
+from hed.errors.error_types import ColumnErrors
+
+
+
+class TestInsertColumns(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.schema = load_schema_version("8.1.0")
+        cls.validator = SpreadsheetValidator(cls.schema)
+
+    def test_insert_columns_no_nested_or_circular_reference(self):
+        df = pd.DataFrame({
+            "column1": ["[column2], Event, Action"],
+            "column2": ["[column1], Item"]
+        })
+        issues = self.validator._validate_square_brackets(df, error_handler=ErrorHandler(True))
+        self.assertEqual(issues[0]['code'], ColumnErrors.NESTED_COLUMN_REF)
+
+    def test_insert_columns_invalid_column_name(self):
+        df = pd.DataFrame({
+            "column1": ["[invalid_column], Event, Action"],
+            "column2": ["Item"]
+        })
+        issues = self.validator._validate_square_brackets(df, error_handler=ErrorHandler(True))
+        self.assertEqual(issues[0]['code'], ColumnErrors.INVALID_COLUMN_REF)
+
+    def test_insert_columns_invalid_syntax(self):
+        df = pd.DataFrame({
+            "column1": ["column2], Event, Action"],
+            "column2": ["Item"]
+        })
+        issues = self.validator._validate_square_brackets(df, error_handler=ErrorHandler(True))
+        self.assertEqual(issues[0]['code'], ColumnErrors.MALFORMED_COLUMN_REF)
+
+    def test_insert_columns_invalid_syntax2(self):
+        df = pd.DataFrame({
+            "column1": ["column2], Event, Action", "[column, Event, Action"],
+            "column2": ["Item", "Action"],
+            "column3": ["This is a [malformed [input string]] with extra [opening brackets", "[Event[Action]]"],
+        })
+        issues = self.validator._validate_square_brackets(df, error_handler=ErrorHandler(True))
+        issues = sort_issues(issues)
+        self.assertEqual(issues[0]['code'], ColumnErrors.MALFORMED_COLUMN_REF)
+        self.assertEqual(len(issues), 6)
+
+    def test_insert_columns_no_self_reference(self):
+        df = pd.DataFrame({
+            "column1": ["[column1], Event, Action"],
+            "column2": ["Item"]
+        })
+        issues = self.validator._validate_square_brackets(df, error_handler=ErrorHandler(True))
+        self.assertEqual(issues[0]['code'], ColumnErrors.SELF_COLUMN_REF)
diff --git a/tests/validator/test_tag_validator_base.py b/tests/validator/test_tag_validator_base.py
index 37d78668c..f50c41af4 100644
--- a/tests/validator/test_tag_validator_base.py
+++ b/tests/validator/test_tag_validator_base.py
@@ -76,7 +76,7 @@ def validator_base(self, test_strings, expected_results, expected_issues, test_f
         for test_key in test_strings:
             hed_string_obj = HedString(test_strings[test_key])
             error_handler = ErrorHandler(check_for_warnings=check_for_warnings)
-            error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj, increment_depth_after=False)
+            error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
             test_issues = []
             if self.compute_forms:
                 test_issues += hed_string_obj.convert_to_canonical_forms(hed_schema)

From 25f6fe3c0f4bc68163b35df41e96db8c33978f4e Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Wed, 22 Mar 2023 20:23:19 -0500
Subject: [PATCH 2/3] Further fix shrinking/expanding(with some test cases). 
 Start updating errors to spec names.

---
 hed/errors/error_messages.py          |  35 +++--
 hed/errors/error_types.py             |  20 ++-
 hed/models/definition_dict.py         |   2 +
 hed/models/definition_entry.py        |   5 +-
 hed/models/hed_group.py               |   3 -
 hed/models/hed_string.py              |   2 +
 hed/models/hed_tag.py                 |  13 ++
 hed/validator/def_validator.py        |  24 +++-
 hed/validator/tag_validator.py        |   6 +-
 tests/validator/test_def_validator.py | 177 ++++++++++++++++++++++++++
 tests/validator/test_tag_validator.py |  14 +-
 11 files changed, 266 insertions(+), 35 deletions(-)

diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py
index fffc549dd..ca379992f 100644
--- a/hed/errors/error_messages.py
+++ b/hed/errors/error_messages.py
@@ -31,14 +31,14 @@ def val_error_tag_extended(tag, problem_tag):
     return f"Hed tag is extended. '{problem_tag}' in {tag}"
 
 
-@hed_error(ValidationErrors.HED_CHARACTER_INVALID)
+@hed_error(ValidationErrors.CHARACTER_INVALID)
 def val_error_invalid_char(source_string, char_index):
     character = source_string[char_index]
     return f'Invalid character "{character}" at index {char_index}"'
 
 
 @hed_tag_error(ValidationErrors.INVALID_TAG_CHARACTER, has_sub_tag=True,
-               actual_code=ValidationErrors.HED_CHARACTER_INVALID)
+               actual_code=ValidationErrors.CHARACTER_INVALID)
 def val_error_invalid_tag_character(tag, problem_tag):
     return f"Invalid character '{problem_tag}' in {tag}"
 
@@ -49,7 +49,7 @@ def val_error_tildes_not_supported(source_string, char_index):
     return f"Tildes not supported.  Replace (a ~ b ~ c) with (a, (b, c)).  '{character}' at index {char_index}'"
 
 
-@hed_error(ValidationErrors.HED_COMMA_MISSING)
+@hed_error(ValidationErrors.COMMA_MISSING)
 def val_error_comma_missing(tag):
     return f"Comma missing after - '{tag}'"
 
@@ -143,27 +143,44 @@ def val_error_sidecar_key_missing(invalid_key, category_keys):
     return f"Category key '{invalid_key}' does not exist in column.  Valid keys are: {category_keys}"
 
 
-@hed_tag_error(ValidationErrors.HED_DEF_UNMATCHED)
-def val_error_def_unmatched(tag):
-    return f"A data-recording’s Def tag cannot be matched to definition.  Tag: '{tag}'"
 
 
-@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_INVALID)
+@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_INVALID, actual_code=ValidationErrors.DEF_EXPAND_INVALID)
 def val_error_bad_def_expand(tag, actual_def, found_def):
     return f"A data-recording’s Def-expand tag does not match the given definition." + \
            f"Tag: '{tag}'.  Actual Def: {actual_def}.  Found Def: {found_def}"
 
 
-@hed_tag_error(ValidationErrors.HED_DEF_VALUE_MISSING, actual_code=ValidationErrors.HED_DEF_VALUE_INVALID)
+@hed_tag_error(ValidationErrors.HED_DEF_UNMATCHED, actual_code=ValidationErrors.DEF_INVALID)
+def val_error_def_unmatched(tag):
+    return f"A data-recording’s Def tag cannot be matched to definition.  Tag: '{tag}'"
+
+
+@hed_tag_error(ValidationErrors.HED_DEF_VALUE_MISSING, actual_code=ValidationErrors.DEF_INVALID)
 def val_error_def_value_missing(tag):
     return f"A def tag requires a placeholder value, but was not given one.  Definition: '{tag}'"
 
 
-@hed_tag_error(ValidationErrors.HED_DEF_VALUE_EXTRA, actual_code=ValidationErrors.HED_DEF_VALUE_INVALID)
+@hed_tag_error(ValidationErrors.HED_DEF_VALUE_EXTRA, actual_code=ValidationErrors.DEF_INVALID)
 def val_error_def_value_extra(tag):
     return f"A def tag does not take a placeholder value, but was given one.  Definition: '{tag}"
 
 
+@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_UNMATCHED, actual_code=ValidationErrors.DEF_EXPAND_INVALID)
+def val_error_def_expand_unmatched(tag):
+    return f"A data-recording’s Def-expand tag cannot be matched to definition.  Tag: '{tag}'"
+
+
+@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_VALUE_MISSING, actual_code=ValidationErrors.DEF_EXPAND_INVALID)
+def val_error_def_expand_value_missing(tag):
+    return f"A Def-expand tag requires a placeholder value, but was not given one.  Definition: '{tag}'"
+
+
+@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_VALUE_EXTRA, actual_code=ValidationErrors.DEF_EXPAND_INVALID)
+def val_error_def_expand_value_extra(tag):
+    return f"A Def-expand tag does not take a placeholder value, but was given one.  Definition: '{tag}"
+
+
 @hed_tag_error(ValidationErrors.HED_TOP_LEVEL_TAG, actual_code=ValidationErrors.HED_TAG_GROUP_ERROR)
 def val_error_top_level_tag(tag):
     return f"A tag that must be in a top level group was found in another location.  {str(tag)}"
diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
index 66cfaecd1..c4fb5df5f 100644
--- a/hed/errors/error_types.py
+++ b/hed/errors/error_types.py
@@ -21,11 +21,22 @@ class ErrorContext:
 
 class ValidationErrors:
     # General validation errors
-    HED_CHARACTER_INVALID = 'HED_CHARACTER_INVALID'
-    HED_COMMA_MISSING = 'HED_COMMA_MISSING'
+    CHARACTER_INVALID = 'CHARACTER_INVALID'
+    COMMA_MISSING = 'COMMA_MISSING'
+    DEF_EXPAND_INVALID = "DEF_EXPAND_INVALID"
+    DEF_INVALID = "DEF_INVALID"
+
+    # NOT OFFICIAL
     HED_DEF_UNMATCHED = "HED_DEF_UNMATCHED"
+    HED_DEF_VALUE_MISSING = "HED_DEF_VALUE_MISSING"
+    HED_DEF_VALUE_EXTRA = "HED_DEF_VALUE_EXTRA"
+
     HED_DEF_EXPAND_INVALID = "HED_DEF_EXPAND_INVALID"
-    HED_DEF_VALUE_INVALID = "HED_DEF_VALUE_INVALID"
+    HED_DEF_EXPAND_UNMATCHED = "HED_DEF_EXPAND_UNMATCHED"
+    HED_DEF_EXPAND_VALUE_MISSING = "HED_DEF_EXPAND_VALUE_MISSING"
+    HED_DEF_EXPAND_VALUE_EXTRA = "HED_DEF_EXPAND_VALUE_EXTRA"
+    # END NOT OFFICIAL
+
     HED_DEFINITION_INVALID = "HED_DEFINITION_INVALID"
     HED_NODE_NAME_EMPTY = 'HED_NODE_NAME_EMPTY'
     HED_ONSET_OFFSET_ERROR = 'HED_ONSET_OFFSET_ERROR'
@@ -70,8 +81,7 @@ class ValidationErrors:
     HED_MULTIPLE_TOP_TAGS = "HED_MULTIPLE_TOP_TAGS"
     HED_TAG_GROUP_TAG = "HED_TAG_GROUP_TAG"
 
-    HED_DEF_VALUE_MISSING = "HED_DEF_VALUE_MISSING"
-    HED_DEF_VALUE_EXTRA = "HED_DEF_VALUE_EXTRA"
+
 
 
 class SidecarErrors:
diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py
index ca3b06b34..04cbfc440 100644
--- a/hed/models/definition_dict.py
+++ b/hed/models/definition_dict.py
@@ -184,7 +184,9 @@ def construct_def_tag(self, hed_tag):
             hed_tag(HedTag): The hed tag to identify definition contents in
         """
         if hed_tag.short_base_tag in {DefTagNames.DEF_ORG_KEY, DefTagNames.DEF_EXPAND_ORG_KEY}:
+            save_parent = hed_tag._parent
             def_contents = self._get_definition_contents(hed_tag)
+            hed_tag._parent = save_parent
             if def_contents is not None:
                 hed_tag._expandable = def_contents
                 hed_tag._expanded = hed_tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY
diff --git a/hed/models/definition_entry.py b/hed/models/definition_entry.py
index cb7581fa3..27c89d33b 100644
--- a/hed/models/definition_entry.py
+++ b/hed/models/definition_entry.py
@@ -26,13 +26,14 @@ def __init__(self, name, contents, takes_value, source_context):
         if contents:
             add_group_to_dict(contents, self.tag_dict)
 
-    def get_definition(self, replace_tag, placeholder_value=None):
+    def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag=False):
         """ Return a copy of the definition with the tag expanded and the placeholder plugged in.
 
         Parameters:
             replace_tag (HedTag): The def hed tag to replace with an expanded version
             placeholder_value (str or None):    If present and required, will replace any pound signs
                                                 in the definition contents.
+            return_copy_of_tag(bool): Set to true for validation
 
         Returns:
             str:          The expanded def tag name
@@ -45,6 +46,8 @@ def get_definition(self, replace_tag, placeholder_value=None):
         if self.takes_value == (placeholder_value is None):
             return None, []
 
+        if return_copy_of_tag:
+            replace_tag = replace_tag.copy()
         output_contents = [replace_tag]
         name = self.name
         if self.contents:
diff --git a/hed/models/hed_group.py b/hed/models/hed_group.py
index 6df911801..7273d956c 100644
--- a/hed/models/hed_group.py
+++ b/hed/models/hed_group.py
@@ -132,9 +132,6 @@ def copy(self):
         Returns:
             HedGroup: The copied group.
 
-        Notes:
-            - The parent tag is removed.
-
         """
         save_parent = self._parent
         self._parent = None
diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py
index db16833f1..7be20fb5d 100644
--- a/hed/models/hed_string.py
+++ b/hed/models/hed_string.py
@@ -96,6 +96,7 @@ def shrink_defs(self):
             expanded_parent = def_expand_group._parent
             if expanded_parent:
                 def_expand_tag.short_base_tag = DefTagNames.DEF_ORG_KEY
+                def_expand_tag._parent = expanded_parent
                 expanded_parent.replace(def_expand_group, def_expand_tag)
 
         return self
@@ -118,6 +119,7 @@ def expand_defs(self):
         for tag, group in replacements:
             tag_parent = tag._parent
             tag_parent.replace(tag, group)
+            tag._parent = group
             tag.short_base_tag = DefTagNames.DEF_EXPAND_KEY
 
         return self
diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py
index 29bcf8cf6..689eeac1d 100644
--- a/hed/models/hed_tag.py
+++ b/hed/models/hed_tag.py
@@ -54,6 +54,19 @@ def __init__(self, hed_string, span=None, hed_schema=None, def_dict=None):
             if def_dict:
                 def_dict.construct_def_tag(self)
 
+    def copy(self):
+        """ Return a deep copy of this tag.
+
+        Returns:
+            HedTag: The copied group.
+
+        """
+        save_parent = self._parent
+        self._parent = None
+        return_copy = copy.deepcopy(self)
+        self._parent = save_parent
+        return return_copy
+
     @property
     def schema_prefix(self):
         """ Library prefix for this tag if one exists.
diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py
index 24a3d8e5b..5b18cd466 100644
--- a/hed/validator/def_validator.py
+++ b/hed/validator/def_validator.py
@@ -51,7 +51,7 @@ def _validate_def_contents(self, def_tag, def_expand_group):
             issues
         """
         def_issues = []
-
+        is_def_tag = def_expand_group is not def_tag
         is_label_tag = def_tag.extension_or_value_portion
         placeholder = None
         found_slash = is_label_tag.find("/")
@@ -62,17 +62,27 @@ def _validate_def_contents(self, def_tag, def_expand_group):
         label_tag_lower = is_label_tag.lower()
         def_entry = self.defs.get(label_tag_lower)
         if def_entry is None:
-            def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_UNMATCHED, tag=def_tag)
+            error_code = ValidationErrors.HED_DEF_UNMATCHED
+            if is_def_tag:
+                error_code = ValidationErrors.HED_DEF_EXPAND_UNMATCHED
+            def_issues += ErrorHandler.format_error(error_code, tag=def_tag)
         else:
-            def_tag_name, def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder)
+            def_tag_name, def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder,
+                                                                  return_copy_of_tag=True)
             if def_tag_name:
-                if def_expand_group is not def_tag and def_expand_group != def_contents:
+                if is_def_tag and def_expand_group != def_contents:
                     def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID,
                                                             tag=def_tag, actual_def=def_contents,
                                                             found_def=def_expand_group)
             elif def_entry.takes_value:
-                def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_VALUE_MISSING, tag=def_tag)
+                error_code = ValidationErrors.HED_DEF_VALUE_MISSING
+                if is_def_tag:
+                    error_code = ValidationErrors.HED_DEF_EXPAND_VALUE_MISSING
+                def_issues += ErrorHandler.format_error(error_code, tag=def_tag)
             else:
-                def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_VALUE_EXTRA, tag=def_tag)
+                error_code = ValidationErrors.HED_DEF_VALUE_EXTRA
+                if is_def_tag:
+                    error_code = ValidationErrors.HED_DEF_EXPAND_VALUE_EXTRA
+                def_issues += ErrorHandler.format_error(error_code, tag=def_tag)
 
-        return def_issues
+        return def_issues
\ No newline at end of file
diff --git a/hed/validator/tag_validator.py b/hed/validator/tag_validator.py
index 2d08eae62..4ecadfc5a 100644
--- a/hed/validator/tag_validator.py
+++ b/hed/validator/tag_validator.py
@@ -201,13 +201,13 @@ def check_delimiter_issues_in_hed_string(self, hed_string):
                 if current_tag.strip() == self.OPENING_GROUP_CHARACTER:
                     current_tag = ''
                 else:
-                    issues += ErrorHandler.format_error(ValidationErrors.HED_COMMA_MISSING, tag=current_tag)
+                    issues += ErrorHandler.format_error(ValidationErrors.COMMA_MISSING, tag=current_tag)
             elif last_non_empty_valid_character == "," and current_character == self.CLOSING_GROUP_CHARACTER:
                 issues += ErrorHandler.format_error(ValidationErrors.HED_TAG_EMPTY, source_string=hed_string,
                                                     char_index=i)
             elif TagValidator._comma_is_missing_after_closing_parentheses(last_non_empty_valid_character,
                                                                           current_character):
-                issues += ErrorHandler.format_error(ValidationErrors.HED_COMMA_MISSING, tag=current_tag[:-1])
+                issues += ErrorHandler.format_error(ValidationErrors.COMMA_MISSING, tag=current_tag[:-1])
                 break
             last_non_empty_valid_character = current_character
             last_non_empty_valid_index = i
@@ -495,7 +495,7 @@ def _report_invalid_character_error(self, hed_string, index):
             list: A singleton list with a dictionary representing the error.
 
         """
-        error_type = ValidationErrors.HED_CHARACTER_INVALID
+        error_type = ValidationErrors.CHARACTER_INVALID
         character = hed_string[index]
         if character == "~":
             error_type = ValidationErrors.HED_TILDES_UNSUPPORTED
diff --git a/tests/validator/test_def_validator.py b/tests/validator/test_def_validator.py
index f889b36f1..6bef321a7 100644
--- a/tests/validator/test_def_validator.py
+++ b/tests/validator/test_def_validator.py
@@ -117,3 +117,180 @@ def test_duplicate_def(self):
         self.assertEqual(len(def_dict.issues), 2)
         self.assertTrue('ec_row' in def_dict.issues[0])
 
+
+
+class TestDefErrors(unittest.TestCase):
+    basic_hed_string_with_def_first_paren = None
+
+    @classmethod
+    def setUpClass(cls):
+        cls.base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/')
+        hed_xml_file = os.path.realpath(os.path.join(cls.base_data_dir, "schema_tests/HED8.0.0t.xml"))
+        cls.hed_schema = schema.load_schema(hed_xml_file)
+        cls.def_contents_string = "(Item/TestDef1,Item/TestDef2)"
+        cls.basic_definition_string = f"(Definition/TestDef,{cls.def_contents_string})"
+        cls.basic_definition_string_no_paren = f"Definition/TestDef,{cls.def_contents_string}"
+        cls.label_def_string = "Def/TestDef"
+        cls.expanded_def_string = f"(Def-expand/TestDef,{cls.def_contents_string})"
+        cls.basic_hed_string = "Item/BasicTestTag1,Item/BasicTestTag2"
+        cls.basic_hed_string_with_def = f"{cls.basic_hed_string},{cls.label_def_string}"
+        cls.basic_hed_string_with_def_first = f"{cls.label_def_string},{cls.basic_hed_string}"
+        cls.basic_hed_string_with_def_first_paren = f"({cls.label_def_string},{cls.basic_hed_string})"
+        cls.placeholder_label_def_string = "Def/TestDefPlaceholder/2471"
+        cls.placeholder_definition_contents = "(Item/TestDef1/#,Item/TestDef2)"
+        cls.placeholder_definition_string = f"(Definition/TestDefPlaceholder/#,{cls.placeholder_definition_contents})"
+        cls.placeholder_definition_string_no_paren = \
+            f"Definition/TestDefPlaceholder/#,{cls.placeholder_definition_contents}"
+        cls.placeholder_expanded_def_string = "(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2))"
+
+        cls.placeholder_hed_string_with_def = f"{cls.basic_hed_string},{cls.placeholder_label_def_string}"
+        cls.placeholder_hed_string_with_def_first = f"{cls.placeholder_label_def_string},{cls.basic_hed_string}"
+        cls.placeholder_hed_string_with_def_first_paren = f"({cls.placeholder_label_def_string},{cls.basic_hed_string})"
+
+    def base_def_validator(self, test_strings, result_strings, expand_defs, shrink_defs,
+                           remove_definitions, extra_ops=None,
+                           basic_definition_string=None):
+        if not basic_definition_string:
+            basic_definition_string = self.basic_definition_string
+        def_dict = DefValidator(basic_definition_string, hed_schema=self.hed_schema)
+
+
+        for key in test_strings:
+            string, expected_result = test_strings[key], result_strings[key]
+            test_string = HedString(string, self.hed_schema, def_dict)
+            def_issues = def_dict.validate_def_tags(test_string)
+            if remove_definitions:
+                test_string.remove_definitions()
+            if expand_defs:
+                test_string.expand_defs()
+            if shrink_defs:
+                test_string.shrink_defs()
+            self.assertEqual(False, bool(def_issues))
+            self.assertEqual(test_string.get_as_short(), expected_result)
+
+    def test_expand_def_tags(self):
+        basic_def_strings = {
+            'str_no_defs': self.basic_definition_string,
+            'str2': self.basic_definition_string_no_paren,
+            'str3': self.basic_hed_string + "," + self.basic_definition_string,
+            'str4': self.basic_definition_string + "," + self.basic_hed_string,
+            'str5': self.basic_hed_string_with_def,
+            'str6': self.basic_hed_string_with_def_first,
+            'str7': self.basic_hed_string_with_def_first_paren,
+            'str8': "("  + self.basic_hed_string_with_def_first_paren + ")",
+        }
+        expanded_def_strings = {
+            'str_no_defs': "",
+            'str2': self.basic_definition_string_no_paren,
+            'str3': self.basic_hed_string,
+            'str4': self.basic_hed_string,
+            'str5': self.basic_hed_string + "," + self.expanded_def_string,
+            'str6': self.expanded_def_string + "," + self.basic_hed_string,
+            'str7': "(" + self.expanded_def_string + "," + self.basic_hed_string + ")",
+            'str8': "((" + self.expanded_def_string + "," + self.basic_hed_string + "))",
+        }
+        expanded_def_strings_with_definition = {
+            'str_no_defs': self.basic_definition_string,
+            'str2': self.basic_definition_string_no_paren,
+            'str3': self.basic_hed_string + "," + self.basic_definition_string,
+            'str4': self.basic_definition_string + "," + self.basic_hed_string,
+            'str5': self.basic_hed_string + "," + self.expanded_def_string,
+            'str6': self.expanded_def_string + "," + self.basic_hed_string,
+            'str7': "(" + self.expanded_def_string + "," + self.basic_hed_string + ")",
+            'str8': "((" + self.expanded_def_string + "," + self.basic_hed_string + "))",
+        }
+
+        self.base_def_validator(basic_def_strings, expanded_def_strings_with_definition,
+                                expand_defs=True,
+                                shrink_defs=False, remove_definitions=False)
+        self.base_def_validator(basic_def_strings, basic_def_strings, 
+                                expand_defs=False, shrink_defs=False, remove_definitions=False)
+        self.base_def_validator(basic_def_strings, basic_def_strings, 
+                                expand_defs=False, shrink_defs=True, remove_definitions=False)
+        self.base_def_validator(expanded_def_strings_with_definition, basic_def_strings,
+                                expand_defs=False, shrink_defs=True,
+                                remove_definitions=False)
+        self.base_def_validator(expanded_def_strings_with_definition, expanded_def_strings_with_definition,
+                                expand_defs=True, shrink_defs=False,
+                                remove_definitions=False)
+        self.base_def_validator(basic_def_strings, expanded_def_strings, 
+                                expand_defs=True, shrink_defs=False, remove_definitions=True)
+
+    def test_expand_def_tags_placeholder(self):
+        basic_def_strings = {
+            'str_no_defs': self.placeholder_definition_string,
+            'str2': self.placeholder_definition_string_no_paren,
+            'str3': self.basic_hed_string + "," + self.placeholder_definition_string,
+            'str4': self.placeholder_definition_string + "," + self.basic_hed_string,
+            'str5': self.placeholder_hed_string_with_def,
+            'str6': self.placeholder_hed_string_with_def_first,
+            'str7': self.placeholder_hed_string_with_def_first_paren,
+        }
+        expanded_def_strings = {
+            'str_no_defs': "",
+            'str2': self.placeholder_definition_string_no_paren,
+            'str3': self.basic_hed_string,
+            'str4': self.basic_hed_string,
+            'str5': self.basic_hed_string + "," + self.placeholder_expanded_def_string,
+            'str6': self.placeholder_expanded_def_string + "," + self.basic_hed_string,
+            'str7': "(" + self.placeholder_expanded_def_string + "," + self.basic_hed_string + ")",
+        }
+        expanded_def_strings_with_definition = {
+            'str_no_defs': self.placeholder_definition_string,
+            'str2': self.placeholder_definition_string_no_paren,
+            'str3': self.basic_hed_string + "," + self.placeholder_definition_string,
+            'str4': self.placeholder_definition_string + "," + self.basic_hed_string,
+            'str5': self.basic_hed_string + "," + self.placeholder_expanded_def_string,
+            'str6': self.placeholder_expanded_def_string + "," + self.basic_hed_string,
+            'str7': "(" + self.placeholder_expanded_def_string + "," + self.basic_hed_string + ")",
+        }
+
+        self.base_def_validator(basic_def_strings, expanded_def_strings_with_definition, 
+                                expand_defs=True, shrink_defs=False,
+                                remove_definitions=False, basic_definition_string=self.placeholder_definition_string)
+
+        self.base_def_validator(basic_def_strings, basic_def_strings, 
+                                expand_defs=False, shrink_defs=False,
+                                remove_definitions=False, basic_definition_string=self.placeholder_definition_string)
+
+        self.base_def_validator(basic_def_strings, basic_def_strings, 
+                                expand_defs=False, shrink_defs=True,
+                                remove_definitions=False, basic_definition_string=self.placeholder_definition_string)
+
+        self.base_def_validator(expanded_def_strings_with_definition, basic_def_strings, 
+                                expand_defs=False, shrink_defs=True,
+                                remove_definitions=False, basic_definition_string=self.placeholder_definition_string)
+
+        self.base_def_validator(basic_def_strings, expanded_def_strings, 
+                                expand_defs=True, shrink_defs=False,
+                                remove_definitions=True, basic_definition_string=self.placeholder_definition_string)
+
+
+    # todo ian: finish updating these
+    # # special case test
+    # def test_changing_tag_then_def_mapping(self):
+    #     def_dict = DefinitionDict()
+    #     def_string = HedString(self.basic_definition_string)
+    #     def_string.convert_to_canonical_forms(None)
+    #     def_dict.check_for_definitions(def_string)
+    #     def_mapper = DefMapper(def_dict)
+    #     validator = HedValidator(self.hed_schema)
+    #     hed_ops = [validator, def_mapper]
+    #
+    #     test_string = HedString(self.label_def_string)
+    #     tag = test_string.children[0]
+    #     tag.tag = "Organizational-property/" + str(tag)
+    #     def_issues = test_string.validate(hed_ops, expand_defs=True)
+    #     self.assertFalse(def_issues)
+    #     self.assertEqual(test_string.get_as_short(), f"{self.expanded_def_string}")
+    #
+    #     test_string = HedString(self.label_def_string)
+    #     tag = test_string.children[0]
+    #     tag.tag = "Organizational-property22/" + str(tag)
+    #     def_issues = test_string.validate(hed_ops, expand_defs=True)
+    #     self.assertTrue(def_issues)
+
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py
index dc0fb910a..939423638 100644
--- a/tests/validator/test_tag_validator.py
+++ b/tests/validator/test_tag_validator.py
@@ -621,9 +621,9 @@ def test_malformed_delimiters(self):
             # 'emptyGroup': False
         }
         expected_issues = {
-            'missingOpeningComma': self.format_error(ValidationErrors.HED_COMMA_MISSING,
+            'missingOpeningComma': self.format_error(ValidationErrors.COMMA_MISSING,
                                                      tag="Action/Reach/To touch("),
-            'missingClosingComma': self.format_error(ValidationErrors.HED_COMMA_MISSING,
+            'missingClosingComma': self.format_error(ValidationErrors.COMMA_MISSING,
                                                      tag="Participant/Effect/Body part/Arm)"),
             'extraOpeningComma': self.format_error(ValidationErrors.HED_TAG_EMPTY,
                                                    source_string=test_strings['extraOpeningComma'],
@@ -674,7 +674,7 @@ def test_malformed_delimiters(self):
             'validNestedParentheses2': [],
             'validNestedParentheses3': [],
             'validNestedParentheses4': [],
-            'invalidNestedParentheses': self.format_error(ValidationErrors.HED_COMMA_MISSING,
+            'invalidNestedParentheses': self.format_error(ValidationErrors.COMMA_MISSING,
                                                           tag="Thing)) "),
             # 'emptyGroup': []
         }
@@ -698,13 +698,13 @@ def test_invalid_characters(self):
             'closingBracket': False
         }
         expected_issues = {
-            'openingBrace': self.format_error(ValidationErrors.HED_CHARACTER_INVALID, char_index=45,
+            'openingBrace': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45,
                                               source_string=test_strings['openingBrace']),
-            'closingBrace': self.format_error(ValidationErrors.HED_CHARACTER_INVALID, char_index=45,
+            'closingBrace': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45,
                                               source_string=test_strings['closingBrace']),
-            'openingBracket': self.format_error(ValidationErrors.HED_CHARACTER_INVALID, char_index=45,
+            'openingBracket': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45,
                                                 source_string=test_strings['openingBracket']),
-            'closingBracket': self.format_error(ValidationErrors.HED_CHARACTER_INVALID, char_index=45,
+            'closingBracket': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45,
                                                 source_string=test_strings['closingBracket'])
         }
         self.validator_semantic(test_strings, expected_results, expected_issues, False)

From 77ea23c6f23266529a71131db1eb453e89af33ea Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 23 Mar 2023 18:05:31 -0500
Subject: [PATCH 3/3] Block HED from appearing in sidecars

---
 hed/errors/error_messages.py       |  7 ++++++-
 hed/errors/error_types.py          |  3 ++-
 hed/validator/sidecar_validator.py | 23 ++++++++++++++++++++++-
 spec_tests/test_errors.py          | 24 ++++++++++++++++++++----
 4 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py
index ca379992f..7fd609a64 100644
--- a/hed/errors/error_messages.py
+++ b/hed/errors/error_messages.py
@@ -277,7 +277,12 @@ def sidecar_error_unknown_column(column_name):
 
 
 @hed_error(SidecarErrors.SIDECAR_HED_USED, actual_code=SidecarErrors.SIDECAR_INVALID)
-def sidecar_hed_used():
+def SIDECAR_HED_USED():
+    return "'HED' is a reserved name and cannot be used as a sidecar except in expected places."
+
+
+@hed_error(SidecarErrors.SIDECAR_HED_USED_COLUMN, actual_code=SidecarErrors.SIDECAR_INVALID)
+def SIDECAR_HED_USED_COLUMN():
     return "'HED' is a reserved name and cannot be used as a sidecar column name"
 
 
diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
index c4fb5df5f..272bfe299 100644
--- a/hed/errors/error_types.py
+++ b/hed/errors/error_types.py
@@ -92,8 +92,9 @@ class SidecarErrors:
     INVALID_POUND_SIGNS_VALUE = 'invalidNumberPoundSigns'
     INVALID_POUND_SIGNS_CATEGORY = 'tooManyPoundSigns'
     UNKNOWN_COLUMN_TYPE = 'sidecarUnknownColumn'
-    SIDECAR_HED_USED = 'SIDECAR_HED_USED'
+    SIDECAR_HED_USED_COLUMN = 'SIDECAR_HED_USED_COLUMN'
     SIDECAR_NA_USED = 'SIDECAR_NA_USED'
+    SIDECAR_HED_USED = 'SIDECAR_HED_USED'
 
 class SchemaErrors:
     HED_SCHEMA_DUPLICATE_NODE = 'HED_SCHEMA_DUPLICATE_NODE'
diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py
index daa71fb07..8c68808e8 100644
--- a/hed/validator/sidecar_validator.py
+++ b/hed/validator/sidecar_validator.py
@@ -81,6 +81,23 @@ def validate_structure(self, sidecar, error_handler):
             error_handler.pop_error_context()
         return all_validation_issues
 
+    @staticmethod
+    def _check_for_key(key, data):
+        if isinstance(data, dict):
+            if key in data:
+                return bool(data[key])
+            else:
+                for sub_data in data.values():
+                    result = SidecarValidator._check_for_key(key, sub_data)
+                    if result is not None:
+                        return result
+        elif isinstance(data, list):
+            for sub_data in data:
+                result = SidecarValidator._check_for_key(key, sub_data)
+                if result is not None:
+                    return result
+        return None
+
     def _validate_column_structure(self, column_name, dict_for_entry, error_handler):
         """ Checks primarily for type errors such as expecting a string and getting a list in a json sidecar.
 
@@ -93,13 +110,17 @@ def _validate_column_structure(self, column_name, dict_for_entry, error_handler)
         """
         val_issues = []
         if column_name in self.reserved_column_names:
-            val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED)
+            val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED_COLUMN)
             return val_issues
 
         column_type = Sidecar._detect_column_type(dict_for_entry=dict_for_entry)
         if column_type is None:
             val_issues += error_handler.format_error_with_context(SidecarErrors.UNKNOWN_COLUMN_TYPE,
                                                                   column_name=column_name)
+        elif column_type == ColumnType.Ignore:
+            found_hed = self._check_for_key("HED", dict_for_entry)
+            if found_hed:
+                val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED)
         elif column_type == ColumnType.Categorical:
             raw_hed_dict = dict_for_entry["HED"]
             if not raw_hed_dict:
diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py
index 9c80d4d98..81942a915 100644
--- a/spec_tests/test_errors.py
+++ b/spec_tests/test_errors.py
@@ -11,6 +11,13 @@
 from hed.errors import ErrorHandler, get_printable_issue_string
 
 
+known_errors = [
+    'SIDECAR_INVALID',
+    'CHARACTER_INVALID',
+    'COMMA_MISSING',
+    "DEF_EXPAND_INVALID",
+    "DEF_INVALID",
+]
 
 skip_tests = ["VERSION_DEPRECATED", "CHARACTER_INVALID", "STYLE_WARNING"]
 
@@ -30,6 +37,12 @@ def run_single_test(self, test_file):
             test_info = json.load(fp)
         for info in test_info:
             error_code = info['error_code']
+            verify_code = False
+            if error_code in known_errors:
+                verify_code = True
+
+            # To be deprecated once we add this to all tests
+            self._verify_code = verify_code
             if error_code in skip_tests:
                 print(f"Skipping {error_code} test")
                 continue
@@ -62,6 +75,13 @@ def report_result(self, expected_result, issues, error_code, description, name,
                 print(f"Passed '{test_type}' (which should fail) '{name}': {test}")
                 print(get_printable_issue_string(issues))
                 self.fail_count.append(name)
+            elif self._verify_code:
+                if any(issue['code'] == error_code for issue in issues):
+                    return
+                print(f"{error_code}: {description}")
+                print(f"Failed '{test_type}' (unexpected errors found) '{name}': {test}")
+                print(get_printable_issue_string(issues))
+                self.fail_count.append(name)
         else:
             if issues:
                 print(f"{error_code}: {description}")
@@ -75,9 +95,6 @@ def _run_single_string_test(self, info, schema, def_dict, error_code, descriptio
             for test in tests:
                 test_string = HedString(test, schema)
 
-                # This expand should not be required here.
-                def_dict.expand_def_tags(test_string)
-
                 issues = string_validator.run_basic_checks(test_string, False)
                 issues += string_validator.run_full_string_checks(test_string)
                 error_handler.add_context_and_filter(issues)
@@ -86,7 +103,6 @@ def _run_single_string_test(self, info, schema, def_dict, error_code, descriptio
     def _run_single_sidecar_test(self, info, schema, def_dict, error_code, description, name, error_handler):
         for result, tests in info.items():
             for test in tests:
-                # Well this is a disaster
                 buffer = io.BytesIO(json.dumps(test).encode("utf-8"))
                 sidecar = Sidecar(buffer)
                 issues = sidecar.validate(hed_schema=schema, extra_def_dicts=def_dict, error_handler=error_handler)