Skip to content

Commit

Permalink
FIXed: when file pattern is mismatched, version could become None.
Browse files Browse the repository at this point in the history
  • Loading branch information
tcpan committed Jan 24, 2025
1 parent e8c90ae commit 9231e8f
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 32 deletions.
41 changes: 21 additions & 20 deletions chorus_upload/config.toml.template
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ azure_sas_token = "sastoken"
path = "/mnt/data/site"
# path = "/mnt/data/site/pitts"

# if true, OMOP is a subdirectory of patient directory: "{patient_id}/OMOP/*.csv"
# if true, OMOP is a subdirectory of patient directory: "{patient_id:w}/OMOP/*.csv"
# if false, OMOP is a sibling directory of patients: "OMOP/*.csv"
# this will influence the central repo's content
# default is false
Expand All @@ -80,12 +80,13 @@ azure_sas_token = "sastoken"
# indicates if the relative path is in a dated directory
versioned = false

# default pattern for omop files.
# default pattern for omop files.
# use ":d" for digit only, or ":w" for digit, letter, and underscore. e.g. {patient_id:w}
pattern = "OMOP/{filepath}"
# pattern = "{patient_id:d}/OMOP_tables/{filepath}"
# pattern = "{patient_id:w}/OMOP_tables/{filepath}"
# if omop_per_patient == true
# pattern = "{patient_id:d}/OMOP/{filepath}"
# pattern = "{version:d}/{patient_id:d}/OMOP_tables/{filepath}"
# pattern = "{patient_id:w}/OMOP/{filepath}"
# pattern = "{version:w}/{patient_id:w}/OMOP_tables/{filepath}"


[site_path.Images]
Expand All @@ -102,11 +103,11 @@ azure_sas_token = "sastoken"
# indicates if the relative path is in a dated directory
versioned = false

# pattern for the files. defaults to {patient_id:d}/Images/{filepath}
# :d indicates that only numeric id are supported.
pattern = "{patient_id:d}/Images/{filepath}"
# pattern = "Person{patient_id:d}/Images/{filepath}"
# pattern = "{version:d}/{patient_id:d}/Images/{filepath}"
# pattern for the files. defaults to {patient_id:w}/Images/{filepath}
# use ":d" for digit only, or ":w" for digit, letter, and underscore. e.g. {patient_id:w}
pattern = "{patient_id:w}/Images/{filepath}"
# pattern = "Person{patient_id:w}/Images/{filepath}"
# pattern = "{version:w}/{patient_id:w}/Images/{filepath}"

[site_path.Waveforms]
# this is the path of the source directory for waveforms
Expand All @@ -115,11 +116,11 @@ azure_sas_token = "sastoken"
# indicates if the relative path is in a dated directory
versioned = false

# pattern for the files. defaults to {patient_id:d}/Waveforms/{filepath}
# :d indicates that only numeric id are supported.
pattern = "{patient_id:d}/Waveforms/{filepath}"
# pattern = "Person{patient_id:d}/Waveforms/{filepath}"
# pattern = "{version:d}/{patient_id:d}/Waveforms/{filepath}"
# pattern for the files. defaults to {patient_id:w}/Waveforms/{filepath}
# use ":d" for digit only, or ":w" for digit, letter, and underscore. e.g. {patient_id:w}
pattern = "{patient_id:w}/Waveforms/{filepath}"
# pattern = "Person{patient_id:w}/Waveforms/{filepath}"
# pattern = "{version:w}/{patient_id:w}/Waveforms/{filepath}"


[site_path.Metadata]
Expand All @@ -129,8 +130,8 @@ azure_sas_token = "sastoken"
# indicates if the relative path is in a dated directory
versioned = false

# pattern for the files. defaults to {patient_id:d}/Waveforms/{filepath}
# :d indicates that only numeric id are supported.
pattern = "{patient_id:d}/Waveforms/{filepath}"
# pattern = "Person{patient_id:d}/Waveforms/{filepath}"
# pattern = "{version:d}/{patient_id:d}/Waveforms/{filepath}"
# pattern for the files. defaults to {patient_id:w}/Waveforms/{filepath}
# use ":d" for digit only, or ":w" for digit, letter, and underscore. e.g. {patient_id:w}
pattern = "{patient_id:w}/Waveforms/{filepath}"
# pattern = "Person{patient_id:w}/Waveforms/{filepath}"
# pattern = "{version:w}/{patient_id:w}/Waveforms/{filepath}"
34 changes: 22 additions & 12 deletions chorus_upload/local_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,15 @@ def _gen_journal(root : FileSystemHelper, modalities: list[str],
myargs = future.result()
perf.add_file(myargs[4])
rlpath = myargs[1]
status = myargs[7]
if verbose and status == "ADDED":
print("INFO: ADDED ", rlpath)
else:
print(".", end="", flush=True)

all_args.append(myargs)
status = myargs[8]
if status in ["ADDED", "MOVED", "UPDATED"]:
if verbose:
print("INFO: ADDED ", rlpath)
else:
print(".", end="", flush=True)
all_args.append(myargs)
elif status == "ERROR4":
print("INFO: File does not fit pattern.", rlpath)

insert_count = JournalTable.insert_journal_entries(databasename, all_args)

Expand All @@ -157,9 +159,14 @@ def _update_journal_one_file(root: FileSystemHelper, relpath:str, modality:str,

# first item is personid.
parsed = compiled_pattern.parse(relpath)
personid = parsed.named.get("patient_id", None)
version = version if version is not None else parsed.named.get("version", None)

if parsed is not None:
personid = parsed.named.get("patient_id", None)
version = version if version is not None else parsed.named.get("version", None)
# print("DEBUG: Parsed ", relpath, " person id ", personid, "version", version)
else:
# print("Info: pattern not matched. skipping ", relpath)
return (None, relpath, modality, None, 0, None, curtimestamp, None, "ERROR4", None, None)

# matched = PERSONID_REGEX.match(relpath)
# personid = matched.group(1) if matched else None

Expand All @@ -170,10 +177,10 @@ def _update_journal_one_file(root: FileSystemHelper, relpath:str, modality:str,
# There should only be 1 active file according to the path in a well-formed
if (len(results) > 1):
# print("ERROR: Multiple active files with that path - journal is not consistent")
return (personid, relpath, modality, None, None, None, curtimestamp, None, "ERROR1", None, None)
return (personid, relpath, modality, None, 0, None, curtimestamp, None, "ERROR1", None, None)
if (len(results) == 0):
# print("ERROR: File found but no metadata.", relpath)
return (personid, relpath, modality, None, None, None, curtimestamp, None, "ERROR2", None, None)
return (personid, relpath, modality, None, 0, None, curtimestamp, None, "ERROR2", None, None)

if len(results) == 1:
(oldfileid, oldsize, oldmtime, oldmd5, oldsync, oldversion) = results[0]
Expand Down Expand Up @@ -260,6 +267,7 @@ def _update_journal(root: FileSystemHelper, modalities: list[str],
journal_version = version if version is not None else time.strftime("%Y%m%d%H%M%S")
if amend: # get the last version
journal_version = JournalTable.get_latest_version(database_name=databasename)
journal_version = journal_version if journal_version is not None else time.strftime("%Y%m%d%H%M%S")

perf = perf_counter.PerformanceCounter()

Expand Down Expand Up @@ -324,6 +332,8 @@ def _update_journal(root: FileSystemHelper, modalities: list[str],
print("ERROR: File found but no metadata.", rlpath)
elif status == "ERROR3":
print("ERROR: File size is different but modtime is the same.", rlpath)
elif status == "ERROR4":
print("ERROR: File does not fit pattern.", rlpath)
elif status == "KEEP":
del modality_files_to_inactivate[myargs[1]]
else:
Expand Down

0 comments on commit 9231e8f

Please sign in to comment.