Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CLI option to include options between full diff output and LLM summary #28

Open
wants to merge 2 commits into
base: staging
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Running
- `git submodule update --init --recursive`
- `export COMPOSE_PROFILES=neo4jDev` or `export COMPOSE_PROFILES=neo4jTest` to set between Dev or Test Environment
- `docker compose up` should get the containers built and spinning.
- For diff results, access to openAI's API key is required. Set `export OPENAI_API_KEY="your_openai_api_key"`, `export ORGANIZATION_KEY="your_organization_key"` and `export PROJECT_KEY="your_project_key"` using your personal access keys.
- For diff results, by default you get the full diff result. We also provide a comprehensive summary of the diff result if selected. For the summary, access to openAI's API key is required. Set `export OPENAI_API_KEY="your_openai_api_key"`, `export ORGANIZATION_KEY="your_organization_key"` and `export PROJECT_KEY="your_project_key"` using your personal access keys.

# Run your script
python your_script.py
Expand Down Expand Up @@ -66,14 +66,14 @@ FHIR Data Comparisons

For every chain, the FHIR data moving between servers can be compared data integrity and sanity. The comparison can be run from `tools/`.
Use the following commands:
- `python3 diff.py --guid guid_sequence --type <xml or json> --all-depths` to compare the paths taken by the guid for all hops.
- `python3 diff.py --guid guid_sequence --type <xml or json> --depth 1` to make the comparisons for paths with a single hop.
- `python3 diff.py --guid guid_sequence --type <xml or json> --all-depths` to compare the paths taken by the guid for all hops and to get the full diff result.
- `python3 diff.py --guid guid_sequence --type <xml or json> --depth 1 --diff summary` to make the comparisons for paths with a single hop and to get the summary of the diff result.

The results will show the differences (if they exist) between the input and output FHIR data through the nodes in a path.

To run the entire process of Game of Telephone and their corresponding Data Comparisons, run the following:
- `python3 run_scripts.py --generate --all-chains --chain-length 2`
- `python3 run_scripts.py --file <file_name> --type xml -c hapi -c blaze`
- `python3 run_scripts.py --file <file_name> --type xml -c hapi -c blaze --diff summary`



Expand Down
16 changes: 16 additions & 0 deletions tools/cli_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ def add_diff_options(func):
default=None,
help="Patient file type - json or xml",
)
@click.option(
"--diff",
"diff_type",
type=str,
required=True,
default="full",
help="Diff output type - summary or full",
)
@optgroup.group(
"Either choose depth = 1 or choose all depths.",
cls=RequiredMutuallyExclusiveOptionGroup,
Expand All @@ -46,6 +54,14 @@ def add_chain_options(func):
default=None,
help="Patient file type - json or xml",
)
@click.option(
"--diff",
"diff_type",
type=str,
required=False,
default="full",
help="Diff output type - summary or full",
)
@optgroup.group(
"Either generate a file or provide a command-line argument",
cls=RequiredMutuallyExclusiveOptionGroup,
Expand Down
91 changes: 55 additions & 36 deletions tools/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,22 @@ def clean_string_from_file(file):
return file


def compare_function(file1, file2, file_type):
def compare_function(file1, file2, file_type, diff_type):
"""Compare two objects and return their differences using DeepDiff"""
if file_type.lower() == "xml":
file1 = xmltodict.parse(clean_string_from_file(file1))
file2 = xmltodict.parse(clean_string_from_file(file2))
diff = DeepDiff(file1, file2, ignore_order=False)
if diff:
gpt_diff_result = json.loads(gpt_diff_output(diff))
return False, gpt_diff_result

return True, f"{file_type} FHIR data is identical."
if not diff:
return True, f"{file_type} FHIR data is identical."
if diff_type == "summary":
return False, json.loads(gpt_diff_output(diff))
else:
return False, str(diff)


def wrap_text(text, width):
"""Wraps Text"""
return "\n".join(textwrap.wrap(text, width))


Expand Down Expand Up @@ -116,7 +118,7 @@ def check_xml(file):
return clean_file.strip().startswith("<")


def compare_paths(paths, chains, file_type):
def compare_paths(paths, chains, file_type, diff_type):
"""Create struct for all segments of a path and internally compare those segments."""
edge_list = []
for path in paths:
Expand Down Expand Up @@ -200,19 +202,21 @@ def compare_paths(paths, chains, file_type):
file1
) # Need to load json twice as the data contains escaped spaces in string format
except json.JSONDecodeError as e:
# print("Chain created, but input JSON is invalid:", e)
print(
"Chain created, but input JSON is invalid:", e
)
file1 = None
"""Here, we say that the input file to a server is invalid, but then how did the server import it?
We skip the compare path function and directly print an invalid message to the table.
"""
pass
# Here, we say that the input file to a server is invalid, but then how did the server import it?
# We skip the compare path function and directly print an invalid message to the table.

else:
file1 = clean_string_from_file(file1)
file2 = clean_string_from_file(file2)

if file1 is not None:
match, result = compare_function(file1, file2, file_type)
match, result = compare_function(
file1, file2, file_type, diff_type
)

else:
match = False
Expand All @@ -221,28 +225,38 @@ def compare_paths(paths, chains, file_type):
)

chain_links = f"{links[current_link_number][0]} -> {links[current_link_number][1]} and {links[next_link_number][0]} -> {links[next_link_number][1]}"
if match is False:
severity = result["Category"]
summary = result["Summary"]
else:
severity = "N/A"
summary = result

# Wrap text for columns
wrapped_guid = wrap_text(guid, 40)
wrapped_chain_links = wrap_text(chain_links, 40)
wrapped_severity = wrap_text(severity, 20)
wrapped_diff = wrap_text(summary, 60)

table_data.append(
[
wrapped_guid,
wrapped_chain_links,
wrapped_severity,
wrapped_diff,
]
)
table_data.append(["" * 40, "-" * 40, "-" * 20, "-" * 60])

if diff_type == "summary":
severity = result["Category"] if not match else "N/A"
summary = result["Summary"] if not match else result

wrapped_severity = wrap_text(severity, 20)
wrapped_diff = wrap_text(summary, 60)

table_data.append(
[
wrapped_guid,
wrapped_chain_links,
wrapped_severity,
wrapped_diff,
]
)
table_data.append(["" * 40, "-" * 40, "-" * 20, "-" * 60])

else:
wrapped_diff = wrap_text(result, 60)
table_data.append(
[
wrapped_guid,
wrapped_chain_links,
wrapped_diff,
]
)
table_data.append(["" * 40, "-" * 40, "-" * 60])

if table_data:
# Remove the last separator row
Expand All @@ -256,10 +270,15 @@ def compare_paths(paths, chains, file_type):
else:
current_guid = row[0]

headers = (
["GUID", "Chain Links", "Severity", "Diff"]
if diff_type == "summary"
else ["GUID", "Chain Links", "Diff"]
)
print(
tabulate(
table_data,
headers=["GUID", "Chain Links", "Severity", "Diff"],
headers=headers,
tablefmt="pretty",
)
)
Expand All @@ -272,11 +291,11 @@ def compare_paths(paths, chains, file_type):

@click.command()
@add_diff_options
def diff_cli_options(guid, depth, all_depths, file_type):
db_query(guid, depth, all_depths, file_type)
def diff_cli_options(guid, depth, all_depths, file_type, diff_type):
db_query(guid, depth, all_depths, file_type, diff_type)


def db_query(guid, depth, all_depths, file_type):
def db_query(guid, depth, all_depths, file_type, diff_type):
"""Command line options to run comparisons"""

if guid:
Expand Down Expand Up @@ -305,7 +324,7 @@ def db_query(guid, depth, all_depths, file_type):
print("Please specify a GUID option.")
return
paths = run_query(query, params)
compare_paths(paths, chains, file_type)
compare_paths(paths, chains, file_type, diff_type)


if __name__ == "__main__":
Expand Down
21 changes: 11 additions & 10 deletions tools/run_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def validate_file_type(file_type, file):

@click.command()
@add_chain_options
def main(chain_length, file, generate, chain, all_chains, file_type):
def main(chain_length, file, generate, chain, all_chains, file_type, diff_type):
"""Construct cli command and sequentially run telephone.py and diff.py"""
# Validate --generate and --file arguments
if generate and file:
Expand All @@ -56,17 +56,18 @@ def main(chain_length, file, generate, chain, all_chains, file_type):
validate_file_type(file_type, file)
validate_options(file_type, chain, all_chains)

# Validate openAI keys
if not (
"OPENAI_API_KEY" in os.environ
and "ORGANIZATION_KEY" in os.environ
and "PROJECT_KEY" in os.environ
):
raise click.UsageError("OpenAI API keys not set.")
if diff_type == "summary":
# Validate openAI keys
if not (
"OPENAI_API_KEY" in os.environ
and "ORGANIZATION_KEY" in os.environ
and "PROJECT_KEY" in os.environ
):
raise click.UsageError("OpenAI API keys not set.")

# Run telephone.py with either --generate or --file
guid = telephone_function(
chain_length, file, generate, chain, all_chains, file_type
chain_length, file, generate, chain, all_chains, file_type, diff_type
)
all_depths = False
depth = 0
Expand All @@ -76,7 +77,7 @@ def main(chain_length, file, generate, chain, all_chains, file_type):
if all_chains:
# Return results of depth = 1 only
depth = 1
db_query(guid, depth, all_depths, file_type)
db_query(guid, depth, all_depths, file_type, diff_type)


if __name__ == "__main__":
Expand Down
10 changes: 7 additions & 3 deletions tools/telephone.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,11 +216,15 @@ def process_step(

@click.command()
@add_chain_options
def cli_options(chain_length, file, generate, chain, all_chains, file_type):
telephone_function(chain_length, file, generate, chain, all_chains, file_type)
def cli_options(chain_length, file, generate, chain, all_chains, file_type, diff_type):
telephone_function(
chain_length, file, generate, chain, all_chains, file_type, diff_type
)


def telephone_function(chain_length, file, generate, chain, all_chains, file_type):
def telephone_function(
chain_length, file, generate, chain, all_chains, file_type, diff_type
):
"""Command line options for the telephone.py script
Vista takes a different format (Bundle Resource) as input, whereas others require a patient
"""
Expand Down