narfindustries · see-k-er · Oct 7, 2024 · Oct 7, 2024
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ Running
 - `git submodule update --init --recursive`
 - `export COMPOSE_PROFILES=neo4jDev` or `export COMPOSE_PROFILES=neo4jTest` to set between Dev or Test Environment
 - `docker compose up` should get the containers built and spinning.
--  For diff results, access to openAI's API key is required. Set `export OPENAI_API_KEY="your_openai_api_key"`, `export ORGANIZATION_KEY="your_organization_key"` and `export PROJECT_KEY="your_project_key"` using your personal access keys.
+-  For diff results, by default you get the full diff result. We also provide a comprehensive summary of the diff result if selected. For the summary, access to openAI's API key is required. Set `export OPENAI_API_KEY="your_openai_api_key"`, `export ORGANIZATION_KEY="your_organization_key"` and `export PROJECT_KEY="your_project_key"` using your personal access keys.
 
 # Run your script
 python your_script.py
@@ -66,14 +66,14 @@ FHIR Data Comparisons
 
 For every chain, the FHIR data moving between servers can be compared data integrity and sanity. The comparison can be run from `tools/`.
 Use the following commands:
-- `python3 diff.py --guid guid_sequence --type <xml or json> --all-depths` to compare the paths taken by the guid for all hops.
-- `python3 diff.py --guid guid_sequence --type <xml or json> --depth 1` to make the comparisons for paths with a single hop. 
+- `python3 diff.py --guid guid_sequence --type <xml or json> --all-depths` to compare the paths taken by the guid for all hops and to get the full diff result.
+- `python3 diff.py --guid guid_sequence --type <xml or json> --depth 1 --diff summary` to make the comparisons for paths with a single hop and to get the summary of the diff result. 
 
 The results will show the differences (if they exist) between the input and output FHIR data through the nodes in a path.
 
 To run the entire process of Game of Telephone and their corresponding Data Comparisons, run the following:
 - `python3 run_scripts.py --generate --all-chains --chain-length 2`
-- `python3 run_scripts.py --file <file_name> --type xml -c hapi -c blaze`
+- `python3 run_scripts.py --file <file_name> --type xml -c hapi -c blaze --diff summary`
 
 
 

diff --git a/tools/cli_options.py b/tools/cli_options.py
@@ -22,6 +22,14 @@ def add_diff_options(func):
         default=None,
         help="Patient file type - json or xml",
     )
+    @click.option(
+        "--diff",
+        "diff_type",
+        type=str,
+        required=True,
+        default="full",
+        help="Diff output type - summary or full",
+    )
     @optgroup.group(
         "Either choose depth = 1 or choose all depths.",
         cls=RequiredMutuallyExclusiveOptionGroup,
@@ -46,6 +54,14 @@ def add_chain_options(func):
         default=None,
         help="Patient file type - json or xml",
     )
+    @click.option(
+        "--diff",
+        "diff_type",
+        type=str,
+        required=False,
+        default="full",
+        help="Diff output type - summary or full",
+    )
     @optgroup.group(
         "Either generate a file or provide a command-line argument",
         cls=RequiredMutuallyExclusiveOptionGroup,

diff --git a/tools/diff.py b/tools/diff.py
@@ -53,20 +53,22 @@ def clean_string_from_file(file):
     return file
 
 
-def compare_function(file1, file2, file_type):
+def compare_function(file1, file2, file_type, diff_type):
     """Compare two objects and return their differences using DeepDiff"""
     if file_type.lower() == "xml":
         file1 = xmltodict.parse(clean_string_from_file(file1))
         file2 = xmltodict.parse(clean_string_from_file(file2))
     diff = DeepDiff(file1, file2, ignore_order=False)
-    if diff:
-        gpt_diff_result = json.loads(gpt_diff_output(diff))
-        return False, gpt_diff_result
-
-    return True, f"{file_type} FHIR data is identical."
+    if not diff:
+        return True, f"{file_type} FHIR data is identical."
+    if diff_type == "summary":
+        return False, json.loads(gpt_diff_output(diff))
+    else:
+        return False, str(diff)
 
 
 def wrap_text(text, width):
+    """Wraps Text"""
     return "\n".join(textwrap.wrap(text, width))
 
 
@@ -116,7 +118,7 @@ def check_xml(file):
     return clean_file.strip().startswith("<")
 
 
-def compare_paths(paths, chains, file_type):
+def compare_paths(paths, chains, file_type, diff_type):
     """Create struct for all segments of a path and internally compare those segments."""
     edge_list = []
     for path in paths:
@@ -200,19 +202,21 @@ def compare_paths(paths, chains, file_type):
                                         file1
                                     )  # Need to load json twice as the data contains escaped spaces in string format
                                 except json.JSONDecodeError as e:
-                                    # print("Chain created, but input JSON is invalid:", e)
+                                    print(
+                                        "Chain created, but input JSON is invalid:", e
+                                    )
                                     file1 = None
-                                    """Here, we say that the input file to a server is invalid, but then how did the server import it?
-                                    We skip the compare path function and directly print an invalid message to the table.
-                                    """
-                                    pass
+                                    # Here, we say that the input file to a server is invalid, but then how did the server import it?
+                                    # We skip the compare path function and directly print an invalid message to the table.
 
                             else:
                                 file1 = clean_string_from_file(file1)
                                 file2 = clean_string_from_file(file2)
 
                         if file1 is not None:
-                            match, result = compare_function(file1, file2, file_type)
+                            match, result = compare_function(
+                                file1, file2, file_type, diff_type
+                            )
 
                         else:
                             match = False
@@ -221,28 +225,38 @@ def compare_paths(paths, chains, file_type):
                             )
 
                         chain_links = f"{links[current_link_number][0]} -> {links[current_link_number][1]} and {links[next_link_number][0]} -> {links[next_link_number][1]}"
-                        if match is False:
-                            severity = result["Category"]
-                            summary = result["Summary"]
-                        else:
-                            severity = "N/A"
-                            summary = result
 
                         # Wrap text for columns
                         wrapped_guid = wrap_text(guid, 40)
                         wrapped_chain_links = wrap_text(chain_links, 40)
-                        wrapped_severity = wrap_text(severity, 20)
-                        wrapped_diff = wrap_text(summary, 60)
-
-                        table_data.append(
-                            [
-                                wrapped_guid,
-                                wrapped_chain_links,
-                                wrapped_severity,
-                                wrapped_diff,
-                            ]
-                        )
-                        table_data.append(["" * 40, "-" * 40, "-" * 20, "-" * 60])
+
+                        if diff_type == "summary":
+                            severity = result["Category"] if not match else "N/A"
+                            summary = result["Summary"] if not match else result
+
+                            wrapped_severity = wrap_text(severity, 20)
+                            wrapped_diff = wrap_text(summary, 60)
+
+                            table_data.append(
+                                [
+                                    wrapped_guid,
+                                    wrapped_chain_links,
+                                    wrapped_severity,
+                                    wrapped_diff,
+                                ]
+                            )
+                            table_data.append(["" * 40, "-" * 40, "-" * 20, "-" * 60])
+
+                        else:
+                            wrapped_diff = wrap_text(result, 60)
+                            table_data.append(
+                                [
+                                    wrapped_guid,
+                                    wrapped_chain_links,
+                                    wrapped_diff,
+                                ]
+                            )
+                            table_data.append(["" * 40, "-" * 40, "-" * 60])
 
                 if table_data:
                     # Remove the last separator row
@@ -256,10 +270,15 @@ def compare_paths(paths, chains, file_type):
                         else:
                             current_guid = row[0]
 
+                    headers = (
+                        ["GUID", "Chain Links", "Severity", "Diff"]
+                        if diff_type == "summary"
+                        else ["GUID", "Chain Links", "Diff"]
+                    )
                     print(
                         tabulate(
                             table_data,
-                            headers=["GUID", "Chain Links", "Severity", "Diff"],
+                            headers=headers,
                             tablefmt="pretty",
                         )
                     )
@@ -272,11 +291,11 @@ def compare_paths(paths, chains, file_type):
 
 @click.command()
 @add_diff_options
-def diff_cli_options(guid, depth, all_depths, file_type):
-    db_query(guid, depth, all_depths, file_type)
+def diff_cli_options(guid, depth, all_depths, file_type, diff_type):
+    db_query(guid, depth, all_depths, file_type, diff_type)
 
 
-def db_query(guid, depth, all_depths, file_type):
+def db_query(guid, depth, all_depths, file_type, diff_type):
     """Command line options to run comparisons"""
 
     if guid:
@@ -305,7 +324,7 @@ def db_query(guid, depth, all_depths, file_type):
         print("Please specify a GUID option.")
         return
     paths = run_query(query, params)
-    compare_paths(paths, chains, file_type)
+    compare_paths(paths, chains, file_type, diff_type)
 
 
 if __name__ == "__main__":

diff --git a/tools/run_scripts.py b/tools/run_scripts.py
@@ -37,7 +37,7 @@ def validate_file_type(file_type, file):
 
 @click.command()
 @add_chain_options
-def main(chain_length, file, generate, chain, all_chains, file_type):
+def main(chain_length, file, generate, chain, all_chains, file_type, diff_type):
     """Construct cli command and sequentially run telephone.py and diff.py"""
     # Validate --generate and --file arguments
     if generate and file:
@@ -56,17 +56,18 @@ def main(chain_length, file, generate, chain, all_chains, file_type):
         validate_file_type(file_type, file)
     validate_options(file_type, chain, all_chains)
 
-    # Validate openAI keys
-    if not (
-        "OPENAI_API_KEY" in os.environ
-        and "ORGANIZATION_KEY" in os.environ
-        and "PROJECT_KEY" in os.environ
-    ):
-        raise click.UsageError("OpenAI API keys not set.")
+    if diff_type == "summary":
+        # Validate openAI keys
+        if not (
+            "OPENAI_API_KEY" in os.environ
+            and "ORGANIZATION_KEY" in os.environ
+            and "PROJECT_KEY" in os.environ
+        ):
+            raise click.UsageError("OpenAI API keys not set.")
 
     # Run telephone.py with either --generate or --file
     guid = telephone_function(
-        chain_length, file, generate, chain, all_chains, file_type
+        chain_length, file, generate, chain, all_chains, file_type, diff_type
     )
     all_depths = False
     depth = 0
@@ -76,7 +77,7 @@ def main(chain_length, file, generate, chain, all_chains, file_type):
     if all_chains:
         # Return results of depth = 1 only
         depth = 1
-    db_query(guid, depth, all_depths, file_type)
+    db_query(guid, depth, all_depths, file_type, diff_type)
 
 
 if __name__ == "__main__":

diff --git a/tools/telephone.py b/tools/telephone.py
@@ -216,11 +216,15 @@ def process_step(
 
 @click.command()
 @add_chain_options
-def cli_options(chain_length, file, generate, chain, all_chains, file_type):
-    telephone_function(chain_length, file, generate, chain, all_chains, file_type)
+def cli_options(chain_length, file, generate, chain, all_chains, file_type, diff_type):
+    telephone_function(
+        chain_length, file, generate, chain, all_chains, file_type, diff_type
+    )
 
 
-def telephone_function(chain_length, file, generate, chain, all_chains, file_type):
+def telephone_function(
+    chain_length, file, generate, chain, all_chains, file_type, diff_type
+):
     """Command line options for the telephone.py script
     Vista takes a different format (Bundle Resource) as input, whereas others require a patient
     """