From 1b95baf69e7c9c623a2aaafbc083199e5a59e514 Mon Sep 17 00:00:00 2001 From: dert1129 Date: Thu, 7 Nov 2024 09:54:30 -0500 Subject: [PATCH 1/3] fix uuid's not being unique --- data_management/services/dlu_filesystem.py | 4 ++-- data_management/services/dlu_management.py | 9 +++++---- data_management/watch_files.py | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/data_management/services/dlu_filesystem.py b/data_management/services/dlu_filesystem.py index f12ffff..6936e3d 100644 --- a/data_management/services/dlu_filesystem.py +++ b/data_management/services/dlu_filesystem.py @@ -29,12 +29,12 @@ def calculate_checksum(file_path: str): class DLUFile: def __init__(self, name: str, path: str, checksum: str, size: int, metadata: dict = {}, - file_id: str = str(uuid.uuid4())): + file_id: str = None): self.name = name self.path = path self.checksum = checksum self.size = size - self.file_id = file_id + self.file_id = file_id or str(uuid.uuid4()) self.metadata = metadata # Returns path without top directory, i.e. package dir or participant dir (bulk uploads) diff --git a/data_management/services/dlu_management.py b/data_management/services/dlu_management.py index 267b2f2..1b8a1b4 100644 --- a/data_management/services/dlu_management.py +++ b/data_management/services/dlu_management.py @@ -98,15 +98,16 @@ def update_dlu_package(self, package_id: str, fields_values: dict): query = "UPDATE data_manager_data_v SET " + query_info["set_clause"] + " WHERE dlu_package_id = %s" self.db.insert_data(query, values) - def insert_dlu_file(self, values): + def insert_dlu_file(self, dlu_fileName, dlu_package_id, dlu_file_id, dlu_filesize, dlu_md5checksum, dlu_metadata): query = "INSERT INTO dlu_file (dlu_fileName, dlu_package_id, dlu_file_id, dlu_filesize, dlu_md5checksum, dlu_metadata) VALUES(%s, %s, %s, %s, %s, %s)" - self.db.insert_data(query, values) - return query % values + return self.db.insert_data( + query, + (dlu_fileName, dlu_package_id, dlu_file_id, dlu_filesize, dlu_md5checksum, dlu_metadata,),) def insert_dlu_files(self, package_id: str, file_list: List[DLUFile]): logger.info(f"Inserting files for package {package_id}") for file in file_list: - query_string = self.insert_dlu_file((file.name, package_id, file.file_id, file.size, file.checksum, json.dumps(file.metadata))) + query_string = self.insert_dlu_file(file.name, package_id, file.file_id, file.size, file.checksum, json.dumps(file.metadata)) logger.info(query_string) def get_ready_to_move(self, package_id: str): diff --git a/data_management/watch_files.py b/data_management/watch_files.py index 706c0d6..0b3ba13 100644 --- a/data_management/watch_files.py +++ b/data_management/watch_files.py @@ -79,7 +79,7 @@ def move_packages_to_DLU(self, packages): file_list = self.dlu_file_handler.match_files(top_level_subdir) else: file_list = self.dlu_file_handler.match_files(package_id) - + self.dlu_file_handler.copy_files(package_id, self.process_file_paths(directory_info.file_details)) self.dlu_file_handler.chown_dir(package_id, file_list) self.dlu_management.insert_dlu_files(package_id, file_list) From fdf2332bf6734bb92f2c6517a8b2adb6e0bc7571 Mon Sep 17 00:00:00 2001 From: dert1129 Date: Thu, 7 Nov 2024 10:19:44 -0500 Subject: [PATCH 2/3] remove this logging --- data_management/services/dlu_management.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/data_management/services/dlu_management.py b/data_management/services/dlu_management.py index 1b8a1b4..6b6f40f 100644 --- a/data_management/services/dlu_management.py +++ b/data_management/services/dlu_management.py @@ -107,8 +107,7 @@ def insert_dlu_file(self, dlu_fileName, dlu_package_id, dlu_file_id, dlu_filesiz def insert_dlu_files(self, package_id: str, file_list: List[DLUFile]): logger.info(f"Inserting files for package {package_id}") for file in file_list: - query_string = self.insert_dlu_file(file.name, package_id, file.file_id, file.size, file.checksum, json.dumps(file.metadata)) - logger.info(query_string) + self.insert_dlu_file(file.name, package_id, file.file_id, file.size, file.checksum, json.dumps(file.metadata)) def get_ready_to_move(self, package_id: str): package_record = self.db.get_data( From a9e09983cc07bf3d35c590ec887f7110a365fada Mon Sep 17 00:00:00 2001 From: dert1129 Date: Thu, 7 Nov 2024 10:23:26 -0500 Subject: [PATCH 3/3] revert changes in here --- data_management/services/dlu_management.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/data_management/services/dlu_management.py b/data_management/services/dlu_management.py index 6b6f40f..267b2f2 100644 --- a/data_management/services/dlu_management.py +++ b/data_management/services/dlu_management.py @@ -98,16 +98,16 @@ def update_dlu_package(self, package_id: str, fields_values: dict): query = "UPDATE data_manager_data_v SET " + query_info["set_clause"] + " WHERE dlu_package_id = %s" self.db.insert_data(query, values) - def insert_dlu_file(self, dlu_fileName, dlu_package_id, dlu_file_id, dlu_filesize, dlu_md5checksum, dlu_metadata): + def insert_dlu_file(self, values): query = "INSERT INTO dlu_file (dlu_fileName, dlu_package_id, dlu_file_id, dlu_filesize, dlu_md5checksum, dlu_metadata) VALUES(%s, %s, %s, %s, %s, %s)" - return self.db.insert_data( - query, - (dlu_fileName, dlu_package_id, dlu_file_id, dlu_filesize, dlu_md5checksum, dlu_metadata,),) + self.db.insert_data(query, values) + return query % values def insert_dlu_files(self, package_id: str, file_list: List[DLUFile]): logger.info(f"Inserting files for package {package_id}") for file in file_list: - self.insert_dlu_file(file.name, package_id, file.file_id, file.size, file.checksum, json.dumps(file.metadata)) + query_string = self.insert_dlu_file((file.name, package_id, file.file_id, file.size, file.checksum, json.dumps(file.metadata))) + logger.info(query_string) def get_ready_to_move(self, package_id: str): package_record = self.db.get_data(