diff --git a/chorus_upload/config.toml.template b/chorus_upload/config.toml.template index 54d4f40..ecd0f57 100644 --- a/chorus_upload/config.toml.template +++ b/chorus_upload/config.toml.template @@ -71,7 +71,7 @@ azure_sas_token = "sastoken" path = "/mnt/data/site" # path = "/mnt/data/site/pitts" - # if true, OMOP is a subdirectory of patient directory: "{patient_id}/OMOP/*.csv" + # if true, OMOP is a subdirectory of patient directory: "{patient_id:w}/OMOP/*.csv" # if false, OMOP is a sibling directory of patients: "OMOP/*.csv" # this will influence the central repo's content # default is false @@ -80,12 +80,13 @@ azure_sas_token = "sastoken" # indicates if the relative path is in a dated directory versioned = false - # default pattern for omop files. + # default pattern for omop files. + # use ":d" for digit only, or ":w" for digit, letter, and underscore. e.g. {patient_id:w} pattern = "OMOP/{filepath}" - # pattern = "{patient_id:d}/OMOP_tables/{filepath}" + # pattern = "{patient_id:w}/OMOP_tables/{filepath}" # if omop_per_patient == true - # pattern = "{patient_id:d}/OMOP/{filepath}" - # pattern = "{version:d}/{patient_id:d}/OMOP_tables/{filepath}" + # pattern = "{patient_id:w}/OMOP/{filepath}" + # pattern = "{version:w}/{patient_id:w}/OMOP_tables/{filepath}" [site_path.Images] @@ -102,11 +103,11 @@ azure_sas_token = "sastoken" # indicates if the relative path is in a dated directory versioned = false - # pattern for the files. defaults to {patient_id:d}/Images/{filepath} - # :d indicates that only numeric id are supported. - pattern = "{patient_id:d}/Images/{filepath}" - # pattern = "Person{patient_id:d}/Images/{filepath}" - # pattern = "{version:d}/{patient_id:d}/Images/{filepath}" + # pattern for the files. defaults to {patient_id:w}/Images/{filepath} + # use ":d" for digit only, or ":w" for digit, letter, and underscore. e.g. {patient_id:w} + pattern = "{patient_id:w}/Images/{filepath}" + # pattern = "Person{patient_id:w}/Images/{filepath}" + # pattern = "{version:w}/{patient_id:w}/Images/{filepath}" [site_path.Waveforms] # this is the path of the source directory for waveforms @@ -115,11 +116,11 @@ azure_sas_token = "sastoken" # indicates if the relative path is in a dated directory versioned = false - # pattern for the files. defaults to {patient_id:d}/Waveforms/{filepath} - # :d indicates that only numeric id are supported. - pattern = "{patient_id:d}/Waveforms/{filepath}" - # pattern = "Person{patient_id:d}/Waveforms/{filepath}" - # pattern = "{version:d}/{patient_id:d}/Waveforms/{filepath}" + # pattern for the files. defaults to {patient_id:w}/Waveforms/{filepath} + # use ":d" for digit only, or ":w" for digit, letter, and underscore. e.g. {patient_id:w} + pattern = "{patient_id:w}/Waveforms/{filepath}" + # pattern = "Person{patient_id:w}/Waveforms/{filepath}" + # pattern = "{version:w}/{patient_id:w}/Waveforms/{filepath}" [site_path.Metadata] @@ -129,8 +130,8 @@ azure_sas_token = "sastoken" # indicates if the relative path is in a dated directory versioned = false - # pattern for the files. defaults to {patient_id:d}/Waveforms/{filepath} - # :d indicates that only numeric id are supported. - pattern = "{patient_id:d}/Waveforms/{filepath}" - # pattern = "Person{patient_id:d}/Waveforms/{filepath}" - # pattern = "{version:d}/{patient_id:d}/Waveforms/{filepath}" + # pattern for the files. defaults to {patient_id:w}/Waveforms/{filepath} + # use ":d" for digit only, or ":w" for digit, letter, and underscore. e.g. {patient_id:w} + pattern = "{patient_id:w}/Waveforms/{filepath}" + # pattern = "Person{patient_id:w}/Waveforms/{filepath}" + # pattern = "{version:w}/{patient_id:w}/Waveforms/{filepath}" diff --git a/chorus_upload/local_ops.py b/chorus_upload/local_ops.py index e08622a..6c1812a 100644 --- a/chorus_upload/local_ops.py +++ b/chorus_upload/local_ops.py @@ -127,13 +127,15 @@ def _gen_journal(root : FileSystemHelper, modalities: list[str], myargs = future.result() perf.add_file(myargs[4]) rlpath = myargs[1] - status = myargs[7] - if verbose and status == "ADDED": - print("INFO: ADDED ", rlpath) - else: - print(".", end="", flush=True) - - all_args.append(myargs) + status = myargs[8] + if status in ["ADDED", "MOVED", "UPDATED"]: + if verbose: + print("INFO: ADDED ", rlpath) + else: + print(".", end="", flush=True) + all_args.append(myargs) + elif status == "ERROR4": + print("INFO: File does not fit pattern.", rlpath) insert_count = JournalTable.insert_journal_entries(databasename, all_args) @@ -157,9 +159,14 @@ def _update_journal_one_file(root: FileSystemHelper, relpath:str, modality:str, # first item is personid. parsed = compiled_pattern.parse(relpath) - personid = parsed.named.get("patient_id", None) - version = version if version is not None else parsed.named.get("version", None) - + if parsed is not None: + personid = parsed.named.get("patient_id", None) + version = version if version is not None else parsed.named.get("version", None) + # print("DEBUG: Parsed ", relpath, " person id ", personid, "version", version) + else: + # print("Info: pattern not matched. skipping ", relpath) + return (None, relpath, modality, None, 0, None, curtimestamp, None, "ERROR4", None, None) + # matched = PERSONID_REGEX.match(relpath) # personid = matched.group(1) if matched else None @@ -170,10 +177,10 @@ def _update_journal_one_file(root: FileSystemHelper, relpath:str, modality:str, # There should only be 1 active file according to the path in a well-formed if (len(results) > 1): # print("ERROR: Multiple active files with that path - journal is not consistent") - return (personid, relpath, modality, None, None, None, curtimestamp, None, "ERROR1", None, None) + return (personid, relpath, modality, None, 0, None, curtimestamp, None, "ERROR1", None, None) if (len(results) == 0): # print("ERROR: File found but no metadata.", relpath) - return (personid, relpath, modality, None, None, None, curtimestamp, None, "ERROR2", None, None) + return (personid, relpath, modality, None, 0, None, curtimestamp, None, "ERROR2", None, None) if len(results) == 1: (oldfileid, oldsize, oldmtime, oldmd5, oldsync, oldversion) = results[0] @@ -260,6 +267,7 @@ def _update_journal(root: FileSystemHelper, modalities: list[str], journal_version = version if version is not None else time.strftime("%Y%m%d%H%M%S") if amend: # get the last version journal_version = JournalTable.get_latest_version(database_name=databasename) + journal_version = journal_version if journal_version is not None else time.strftime("%Y%m%d%H%M%S") perf = perf_counter.PerformanceCounter() @@ -324,6 +332,8 @@ def _update_journal(root: FileSystemHelper, modalities: list[str], print("ERROR: File found but no metadata.", rlpath) elif status == "ERROR3": print("ERROR: File size is different but modtime is the same.", rlpath) + elif status == "ERROR4": + print("ERROR: File does not fit pattern.", rlpath) elif status == "KEEP": del modality_files_to_inactivate[myargs[1]] else: