From 6fed6808814ff2c5b82ecf4ab1e56db060aad7a8 Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 7 Jun 2024 17:02:05 -0400 Subject: [PATCH 01/23] add asyncio --- poetry.lock | 26 ++++++++++---------- pyproject.toml | 1 + schematic/store/synapse.py | 49 +++++++++++++++++++++++++++++++------- 3 files changed, 56 insertions(+), 20 deletions(-) diff --git a/poetry.lock b/poetry.lock index 49550859a..46ff137d6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -190,6 +190,19 @@ files = [ [package.dependencies] typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} +[[package]] +name = "asyncio" +version = "3.4.3" +description = "reference implementation of PEP 3156" +optional = false +python-versions = "*" +files = [ + {file = "asyncio-3.4.3-cp33-none-win32.whl", hash = "sha256:b62c9157d36187eca799c378e572c969f0da87cd5fc42ca372d92cdb06e7e1de"}, + {file = "asyncio-3.4.3-cp33-none-win_amd64.whl", hash = "sha256:c46a87b48213d7464f22d9a497b9eef8c1928b68320a2fa94240f969f6fec08c"}, + {file = "asyncio-3.4.3-py3-none-any.whl", hash = "sha256:c4d18b22701821de07bd6aea8b53d21449ec0ec5680645e5317062ea21817d2d"}, + {file = "asyncio-3.4.3.tar.gz", hash = "sha256:83360ff8bc97980e4ff25c964c7bd3923d333d177aa4f7fb736b019f26c7cb41"}, +] + [[package]] name = "asyncio-atexit" version = "1.0.1" @@ -3350,7 +3363,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -3358,16 +3370,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -3384,7 +3388,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -3392,7 +3395,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -4862,4 +4864,4 @@ aws = ["uWSGI"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.11" -content-hash = "5bf0c831977694ea541db24481181ec1980ec9589a2adbd9f30ed0fe7f2b2742" +content-hash = "fafb9420f9ac503b9e3a2f1c25321ea8eb59332eb82f01b418f7096def4488c5" diff --git a/pyproject.toml b/pyproject.toml index 03b88c81e..87f203b1d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,7 @@ Flask = {version = "2.1.3", optional = true} Flask-Cors = {version = "^3.0.10", optional = true} uWSGI = {version = "^2.0.21", optional = true} Jinja2 = {version = ">2.11.3", optional = true} +asyncio = "^3.4.3" [tool.poetry.extras] api = ["connexion", "Flask", "Flask-Cors", "Jinja2", "pyopenssl"] diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index a15137ae8..b000833c3 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -68,6 +68,8 @@ from schematic.store.base import BaseStorage from schematic.exceptions import AccessCredentialsError from schematic.configuration.configuration import CONFIG +from synapseclient.annotations import Annotations +import asyncio logger = logging.getLogger("Synapse storage") @@ -1634,6 +1636,33 @@ def _add_annotations( self.syn.set_annotations(annos) return + async def _add_annotations_async( + self, + dmge: DataModelGraphExplorer, + row: pd.Series, + entityId: str, + hideBlanks: bool, + annotation_keys: str, + ) -> None: + """add annotations to entity ids in an asynchronous way + + Args: + dmge: DataModelGraphExplorer object, + row: current row of manifest being processed + entityId (str): synapseId of entity to add annotations to + hideBlanks: Boolean flag that does not upload annotation keys with blank values when true. Uploads Annotation keys with empty string values when false. + annotation_keys: (str) display_label/class_label(default), Determines labeling syle for annotation keys. class_label will format the display + name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain + display label formatting while ensuring the label is formatted properly for Synapse annotations. + """ + # Format annotations for Synapse + annos = self.format_row_annotations( + dmge, row, entityId, hideBlanks, annotation_keys + ) + + if annos: + await annos.store_async() + def _create_entity_id(self, idx, row, manifest, datasetId): """Helper function to generate an entityId and add it to the appropriate row in the manifest. Args: @@ -1653,7 +1682,7 @@ def _create_entity_id(self, idx, row, manifest, datasetId): manifest.loc[idx, "entityId"] = entityId return manifest, entityId - def add_annotations_to_entities_files( + async def add_annotations_to_entities_files( self, dmge, manifest, @@ -1694,6 +1723,7 @@ def add_annotations_to_entities_files( ).drop("entityId_x", axis=1) # Fill `entityId` for each row if missing and annotate entity as appropriate + set_annotations_requests=[] for idx, row in manifest.iterrows(): if not row["entityId"] and ( manifest_record_type == "file_and_entities" @@ -1713,8 +1743,11 @@ def add_annotations_to_entities_files( # Adding annotations to connected files. if entityId: - self._add_annotations(dmge, row, entityId, hideBlanks, annotation_keys) + #self._add_annotations(dmge, row, entityId, hideBlanks, annotation_keys) + set_annotations_requests.append(asyncio.create_task(self._add_annotations_async(dmge, row, entityId, hideBlanks, annotation_keys))) logger.info(f"Added annotations to entity: {entityId}") + # execute all requests of setting annotations + responses = await asyncio.gather(*set_annotations_requests, return_exceptions=True) return manifest def upload_manifest_as_table( @@ -1767,7 +1800,7 @@ def upload_manifest_as_table( ) if file_annotations_upload: - manifest = self.add_annotations_to_entities_files( + manifest = asyncio.run(self.add_annotations_to_entities_files( dmge, manifest, manifest_record_type, @@ -1775,7 +1808,7 @@ def upload_manifest_as_table( hideBlanks, manifest_synapse_table_id, annotation_keys, - ) + )) # Load manifest to synapse as a CSV File manifest_synapse_file_id = self.upload_manifest_file( manifest, @@ -1840,14 +1873,14 @@ def upload_manifest_as_csv( manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse. """ if file_annotations_upload: - manifest = self.add_annotations_to_entities_files( + manifest = asyncio.run(self.add_annotations_to_entities_files( dmge, manifest, manifest_record_type, datasetId, hideBlanks, annotation_keys=annotation_keys, - ) + )) # Load manifest to synapse as a CSV File manifest_synapse_file_id = self.upload_manifest_file( @@ -1917,7 +1950,7 @@ def upload_manifest_combo( ) if file_annotations_upload: - manifest = self.add_annotations_to_entities_files( + manifest = asyncio.run(self.add_annotations_to_entities_files( dmge, manifest, manifest_record_type, @@ -1925,7 +1958,7 @@ def upload_manifest_combo( hideBlanks, manifest_synapse_table_id, annotation_keys=annotation_keys, - ) + )) # Load manifest to synapse as a CSV File manifest_synapse_file_id = self.upload_manifest_file( From ce840e0e348924801e3bc61405824e13c78b3133 Mon Sep 17 00:00:00 2001 From: linglp Date: Mon, 10 Jun 2024 11:24:54 -0400 Subject: [PATCH 02/23] temp --- schematic/store/synapse.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index b000833c3..735479cab 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -68,7 +68,7 @@ from schematic.store.base import BaseStorage from schematic.exceptions import AccessCredentialsError from schematic.configuration.configuration import CONFIG -from synapseclient.annotations import Annotations +from synapseclient.models.annotations import Annotations import asyncio logger = logging.getLogger("Synapse storage") @@ -702,7 +702,6 @@ def fill_in_entity_id_filename( new_files = self._get_file_entityIds( dataset_files=dataset_files, only_new_files=True, manifest=manifest ) - # update manifest so that it contains new dataset files new_files = pd.DataFrame(new_files) manifest = ( @@ -1370,7 +1369,16 @@ def format_row_annotations( metadataSyn[keySyn] = v # set annotation(s) for the various objects/items in a dataset on Synapse + print('entity id to get annotations', entityId) annos = self.syn.get_annotations(entityId) + annos_new = Annotations.from_dict(synapse_annotations=annos) + print('annos', annos) + print('annos new', annos_new) + + print(type(annos_new)) + breakpoint() + + csv_list_regex = comma_separated_list_regex() for anno_k, anno_v in metadataSyn.items(): # Remove keys with nan or empty string values from dict of annotations to be uploaded @@ -1748,6 +1756,10 @@ async def add_annotations_to_entities_files( logger.info(f"Added annotations to entity: {entityId}") # execute all requests of setting annotations responses = await asyncio.gather(*set_annotations_requests, return_exceptions=True) + + # handle errors + for response in responses: + print('repsonse', response) return manifest def upload_manifest_as_table( From bf8d8d4707b94ddcbc12b4fd7f7ae6348def8a94 Mon Sep 17 00:00:00 2001 From: linglp Date: Mon, 10 Jun 2024 15:36:34 -0400 Subject: [PATCH 03/23] store annotations async --- schematic/store/synapse.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 735479cab..e62294861 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1369,16 +1369,7 @@ def format_row_annotations( metadataSyn[keySyn] = v # set annotation(s) for the various objects/items in a dataset on Synapse - print('entity id to get annotations', entityId) annos = self.syn.get_annotations(entityId) - annos_new = Annotations.from_dict(synapse_annotations=annos) - print('annos', annos) - print('annos new', annos_new) - - print(type(annos_new)) - breakpoint() - - csv_list_regex = comma_separated_list_regex() for anno_k, anno_v in metadataSyn.items(): # Remove keys with nan or empty string values from dict of annotations to be uploaded @@ -1667,9 +1658,9 @@ async def _add_annotations_async( annos = self.format_row_annotations( dmge, row, entityId, hideBlanks, annotation_keys ) - if annos: - await annos.store_async() + annotation_class=Annotations(annotations=dict(annos), id=annos.id, etag=annos.etag) + await annotation_class.store_async(synapse_client=self.syn) def _create_entity_id(self, idx, row, manifest, datasetId): """Helper function to generate an entityId and add it to the appropriate row in the manifest. @@ -1759,7 +1750,8 @@ async def add_annotations_to_entities_files( # handle errors for response in responses: - print('repsonse', response) + if response: + logger.error(response) return manifest def upload_manifest_as_table( From 02ff5e927e985354f4559b6ef0fc9fdd9a244f0a Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 11 Jun 2024 12:00:53 -0400 Subject: [PATCH 04/23] add and store annos in an async way --- schematic/store/synapse.py | 89 +++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 36 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index e62294861..d3971d872 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -11,6 +11,7 @@ import secrets import shutil import synapseclient +from synapseclient.api import get_entity_id_bundle2 import uuid # used to generate unique names for entities from tenacity import ( @@ -1335,10 +1336,26 @@ def upload_manifest_file( ) return manifest_synapse_file_id + + async def get_async_annotation(self, synapse_id): + return await get_entity_id_bundle2( + entity_id=synapse_id, request={"includeAnnotations": True}, synapse_client=self.syn + ) + + async def store_async_annotation(self, annotation_dict) -> Annotations: + annotation_data = Annotations.from_dict( + synapse_annotations=annotation_dict["annotations"]["annotations"] + ) + annotation_class = Annotations( + annotations=annotation_data, + etag=annotation_dict["annotations"]["etag"], + id=annotation_dict["annotations"]["id"], + ) + return await annotation_class.store_async(self.syn) @missing_entity_handler def format_row_annotations( - self, dmge, row, entityId: str, hideBlanks: bool, annotation_keys: str + self, dmge, row, annos: dict, hideBlanks: bool, annotation_keys: str ): # prepare metadata for Synapse storage (resolve display name into a name that Synapse annotations support (e.g no spaces, parenthesis) # note: the removal of special characters, will apply only to annotation keys; we are not altering the manifest @@ -1369,7 +1386,6 @@ def format_row_annotations( metadataSyn[keySyn] = v # set annotation(s) for the various objects/items in a dataset on Synapse - annos = self.syn.get_annotations(entityId) csv_list_regex = comma_separated_list_regex() for anno_k, anno_v in metadataSyn.items(): # Remove keys with nan or empty string values from dict of annotations to be uploaded @@ -1635,32 +1651,42 @@ def _add_annotations( self.syn.set_annotations(annos) return - async def _add_annotations_async( - self, - dmge: DataModelGraphExplorer, - row: pd.Series, - entityId: str, - hideBlanks: bool, - annotation_keys: str, - ) -> None: - """add annotations to entity ids in an asynchronous way + async def _get_store_annotations_async(self, entityId:str, dmge: DataModelGraphExplorer, row:pd.Series, hideBlanks:bool, annotation_keys:str) -> None: + """store annotations in an async way Args: - dmge: DataModelGraphExplorer object, - row: current row of manifest being processed - entityId (str): synapseId of entity to add annotations to - hideBlanks: Boolean flag that does not upload annotation keys with blank values when true. Uploads Annotation keys with empty string values when false. - annotation_keys: (str) display_label/class_label(default), Determines labeling syle for annotation keys. class_label will format the display - name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain - display label formatting while ensuring the label is formatted properly for Synapse annotations. - """ - # Format annotations for Synapse - annos = self.format_row_annotations( - dmge, row, entityId, hideBlanks, annotation_keys - ) - if annos: - annotation_class=Annotations(annotations=dict(annos), id=annos.id, etag=annos.etag) - await annotation_class.store_async(synapse_client=self.syn) + entityId (synapse entity id): synapse entity id + dmge (DataModelGraphExplorer): data model graph explorer + row (pd.Series): pandas series + hideBlanks (bool): if true, does not upload annotation keys with blank values. If false, Uploads Annotation keys with empty string values. + annotation_keys (str): annotation keys, default to "class_label" + """ + # get annotations asynchronously + requests = set() + get_annos = asyncio.create_task(self.get_async_annotation(entityId)) + requests.add(get_annos) + + while requests: + done_tasks, pending_tasks = await asyncio.wait( + requests, return_when=asyncio.FIRST_COMPLETED + ) + requests = pending_tasks + # after the task of getting annotation gets completed, + # store annotations + for completed_task in done_tasks: + try: + annos = completed_task.result() + + if isinstance(annos, Annotations): + logger.info("Successfully stored annotations: {annos}") + else: + # remove special characters in annotations + annos = self.format_row_annotations(dmge, row, annos, hideBlanks, annotation_keys) + requests.add( + asyncio.create_task(self.store_async_annotation(annotation_dict=annos)) + ) + except Exception as e: + logger.error(f"failed with { repr(e) }.") def _create_entity_id(self, idx, row, manifest, datasetId): """Helper function to generate an entityId and add it to the appropriate row in the manifest. @@ -1722,7 +1748,6 @@ async def add_annotations_to_entities_files( ).drop("entityId_x", axis=1) # Fill `entityId` for each row if missing and annotate entity as appropriate - set_annotations_requests=[] for idx, row in manifest.iterrows(): if not row["entityId"] and ( manifest_record_type == "file_and_entities" @@ -1742,16 +1767,8 @@ async def add_annotations_to_entities_files( # Adding annotations to connected files. if entityId: - #self._add_annotations(dmge, row, entityId, hideBlanks, annotation_keys) - set_annotations_requests.append(asyncio.create_task(self._add_annotations_async(dmge, row, entityId, hideBlanks, annotation_keys))) + await self._get_store_annotations_async(entityId=entityId, dmge=dmge, row=row, hideBlanks=hideBlanks, annotation_keys=annotation_keys) logger.info(f"Added annotations to entity: {entityId}") - # execute all requests of setting annotations - responses = await asyncio.gather(*set_annotations_requests, return_exceptions=True) - - # handle errors - for response in responses: - if response: - logger.error(response) return manifest def upload_manifest_as_table( From 1fb602431aed6218a80716b4a0baf3e57c266f4d Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 11 Jun 2024 12:13:44 -0400 Subject: [PATCH 05/23] add type hinting --- schematic/store/synapse.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index d3971d872..e5a059139 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -24,7 +24,7 @@ from time import sleep # allows specifying explicit variable types -from typing import Dict, List, Tuple, Sequence, Union, Optional +from typing import Dict, List, Tuple, Sequence, Union, Optional, Any from synapseclient import ( Synapse, @@ -1337,12 +1337,29 @@ def upload_manifest_file( return manifest_synapse_file_id - async def get_async_annotation(self, synapse_id): + async def get_async_annotation(self, synapse_id: str) -> Dict[str, Any]: + """get annotations asynchronously + + Args: + synapse_id (str): synapse id of the entity that the annotation belongs + + Returns: + Dict[str, Any]: The requested entity bundle matching + + """ return await get_entity_id_bundle2( entity_id=synapse_id, request={"includeAnnotations": True}, synapse_client=self.syn ) - async def store_async_annotation(self, annotation_dict) -> Annotations: + async def store_async_annotation(self, annotation_dict: dict) -> Annotations: + """store annotation in an async way + + Args: + annotation_dict (dict): annotation in a dictionary format + + Returns: + Annotations: The stored annotations. + """ annotation_data = Annotations.from_dict( synapse_annotations=annotation_dict["annotations"]["annotations"] ) From 91336c9e61d624b078068cd03b9b7e99728f40f1 Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 11 Jun 2024 12:16:54 -0400 Subject: [PATCH 06/23] run black --- schematic/store/synapse.py | 105 +++++++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 40 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index e5a059139..53cf2b7bb 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1336,21 +1336,23 @@ def upload_manifest_file( ) return manifest_synapse_file_id - + async def get_async_annotation(self, synapse_id: str) -> Dict[str, Any]: """get annotations asynchronously Args: - synapse_id (str): synapse id of the entity that the annotation belongs + synapse_id (str): synapse id of the entity that the annotation belongs Returns: Dict[str, Any]: The requested entity bundle matching """ return await get_entity_id_bundle2( - entity_id=synapse_id, request={"includeAnnotations": True}, synapse_client=self.syn + entity_id=synapse_id, + request={"includeAnnotations": True}, + synapse_client=self.syn, ) - + async def store_async_annotation(self, annotation_dict: dict) -> Annotations: """store annotation in an async way @@ -1368,7 +1370,7 @@ async def store_async_annotation(self, annotation_dict: dict) -> Annotations: etag=annotation_dict["annotations"]["etag"], id=annotation_dict["annotations"]["id"], ) - return await annotation_class.store_async(self.syn) + return await annotation_class.store_async(self.syn) @missing_entity_handler def format_row_annotations( @@ -1668,17 +1670,24 @@ def _add_annotations( self.syn.set_annotations(annos) return - async def _get_store_annotations_async(self, entityId:str, dmge: DataModelGraphExplorer, row:pd.Series, hideBlanks:bool, annotation_keys:str) -> None: + async def _get_store_annotations_async( + self, + entityId: str, + dmge: DataModelGraphExplorer, + row: pd.Series, + hideBlanks: bool, + annotation_keys: str, + ) -> None: """store annotations in an async way Args: entityId (synapse entity id): synapse entity id dmge (DataModelGraphExplorer): data model graph explorer - row (pd.Series): pandas series - hideBlanks (bool): if true, does not upload annotation keys with blank values. If false, Uploads Annotation keys with empty string values. + row (pd.Series): pandas series + hideBlanks (bool): if true, does not upload annotation keys with blank values. If false, Uploads Annotation keys with empty string values. annotation_keys (str): annotation keys, default to "class_label" """ - # get annotations asynchronously + # get annotations asynchronously requests = set() get_annos = asyncio.create_task(self.get_async_annotation(entityId)) requests.add(get_annos) @@ -1688,7 +1697,7 @@ async def _get_store_annotations_async(self, entityId:str, dmge: DataModelGraphE requests, return_when=asyncio.FIRST_COMPLETED ) requests = pending_tasks - # after the task of getting annotation gets completed, + # after the task of getting annotation gets completed, # store annotations for completed_task in done_tasks: try: @@ -1698,12 +1707,16 @@ async def _get_store_annotations_async(self, entityId:str, dmge: DataModelGraphE logger.info("Successfully stored annotations: {annos}") else: # remove special characters in annotations - annos = self.format_row_annotations(dmge, row, annos, hideBlanks, annotation_keys) + annos = self.format_row_annotations( + dmge, row, annos, hideBlanks, annotation_keys + ) requests.add( - asyncio.create_task(self.store_async_annotation(annotation_dict=annos)) + asyncio.create_task( + self.store_async_annotation(annotation_dict=annos) + ) ) except Exception as e: - logger.error(f"failed with { repr(e) }.") + logger.error(f"failed with { repr(e) }.") def _create_entity_id(self, idx, row, manifest, datasetId): """Helper function to generate an entityId and add it to the appropriate row in the manifest. @@ -1784,7 +1797,13 @@ async def add_annotations_to_entities_files( # Adding annotations to connected files. if entityId: - await self._get_store_annotations_async(entityId=entityId, dmge=dmge, row=row, hideBlanks=hideBlanks, annotation_keys=annotation_keys) + await self._get_store_annotations_async( + entityId=entityId, + dmge=dmge, + row=row, + hideBlanks=hideBlanks, + annotation_keys=annotation_keys, + ) logger.info(f"Added annotations to entity: {entityId}") return manifest @@ -1838,15 +1857,17 @@ def upload_manifest_as_table( ) if file_annotations_upload: - manifest = asyncio.run(self.add_annotations_to_entities_files( - dmge, - manifest, - manifest_record_type, - datasetId, - hideBlanks, - manifest_synapse_table_id, - annotation_keys, - )) + manifest = asyncio.run( + self.add_annotations_to_entities_files( + dmge, + manifest, + manifest_record_type, + datasetId, + hideBlanks, + manifest_synapse_table_id, + annotation_keys, + ) + ) # Load manifest to synapse as a CSV File manifest_synapse_file_id = self.upload_manifest_file( manifest, @@ -1911,14 +1932,16 @@ def upload_manifest_as_csv( manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse. """ if file_annotations_upload: - manifest = asyncio.run(self.add_annotations_to_entities_files( - dmge, - manifest, - manifest_record_type, - datasetId, - hideBlanks, - annotation_keys=annotation_keys, - )) + manifest = asyncio.run( + self.add_annotations_to_entities_files( + dmge, + manifest, + manifest_record_type, + datasetId, + hideBlanks, + annotation_keys=annotation_keys, + ) + ) # Load manifest to synapse as a CSV File manifest_synapse_file_id = self.upload_manifest_file( @@ -1988,15 +2011,17 @@ def upload_manifest_combo( ) if file_annotations_upload: - manifest = asyncio.run(self.add_annotations_to_entities_files( - dmge, - manifest, - manifest_record_type, - datasetId, - hideBlanks, - manifest_synapse_table_id, - annotation_keys=annotation_keys, - )) + manifest = asyncio.run( + self.add_annotations_to_entities_files( + dmge, + manifest, + manifest_record_type, + datasetId, + hideBlanks, + manifest_synapse_table_id, + annotation_keys=annotation_keys, + ) + ) # Load manifest to synapse as a CSV File manifest_synapse_file_id = self.upload_manifest_file( From e62597b4fc42e152e8b644262b21ac112b1ba9bf Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 12 Jun 2024 23:18:21 -0400 Subject: [PATCH 07/23] delete unused funct --- schematic/store/synapse.py | 91 ++++++++++++++------------------------ 1 file changed, 33 insertions(+), 58 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 53cf2b7bb..4a80903f5 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -71,6 +71,7 @@ from schematic.configuration.configuration import CONFIG from synapseclient.models.annotations import Annotations import asyncio +from dataclasses import asdict logger = logging.getLogger("Synapse storage") @@ -1373,8 +1374,8 @@ async def store_async_annotation(self, annotation_dict: dict) -> Annotations: return await annotation_class.store_async(self.syn) @missing_entity_handler - def format_row_annotations( - self, dmge, row, annos: dict, hideBlanks: bool, annotation_keys: str + async def format_row_annotations( + self, dmge, row, entityId: str, hideBlanks: bool, annotation_keys: str ): # prepare metadata for Synapse storage (resolve display name into a name that Synapse annotations support (e.g no spaces, parenthesis) # note: the removal of special characters, will apply only to annotation keys; we are not altering the manifest @@ -1405,6 +1406,8 @@ def format_row_annotations( metadataSyn[keySyn] = v # set annotation(s) for the various objects/items in a dataset on Synapse + annos = await self.get_async_annotation(entityId) + csv_list_regex = comma_separated_list_regex() for anno_k, anno_v in metadataSyn.items(): # Remove keys with nan or empty string values from dict of annotations to be uploaded @@ -1670,54 +1673,6 @@ def _add_annotations( self.syn.set_annotations(annos) return - async def _get_store_annotations_async( - self, - entityId: str, - dmge: DataModelGraphExplorer, - row: pd.Series, - hideBlanks: bool, - annotation_keys: str, - ) -> None: - """store annotations in an async way - - Args: - entityId (synapse entity id): synapse entity id - dmge (DataModelGraphExplorer): data model graph explorer - row (pd.Series): pandas series - hideBlanks (bool): if true, does not upload annotation keys with blank values. If false, Uploads Annotation keys with empty string values. - annotation_keys (str): annotation keys, default to "class_label" - """ - # get annotations asynchronously - requests = set() - get_annos = asyncio.create_task(self.get_async_annotation(entityId)) - requests.add(get_annos) - - while requests: - done_tasks, pending_tasks = await asyncio.wait( - requests, return_when=asyncio.FIRST_COMPLETED - ) - requests = pending_tasks - # after the task of getting annotation gets completed, - # store annotations - for completed_task in done_tasks: - try: - annos = completed_task.result() - - if isinstance(annos, Annotations): - logger.info("Successfully stored annotations: {annos}") - else: - # remove special characters in annotations - annos = self.format_row_annotations( - dmge, row, annos, hideBlanks, annotation_keys - ) - requests.add( - asyncio.create_task( - self.store_async_annotation(annotation_dict=annos) - ) - ) - except Exception as e: - logger.error(f"failed with { repr(e) }.") - def _create_entity_id(self, idx, row, manifest, datasetId): """Helper function to generate an entityId and add it to the appropriate row in the manifest. Args: @@ -1778,6 +1733,7 @@ async def add_annotations_to_entities_files( ).drop("entityId_x", axis=1) # Fill `entityId` for each row if missing and annotate entity as appropriate + requests=set() for idx, row in manifest.iterrows(): if not row["entityId"] and ( manifest_record_type == "file_and_entities" @@ -1797,14 +1753,33 @@ async def add_annotations_to_entities_files( # Adding annotations to connected files. if entityId: - await self._get_store_annotations_async( - entityId=entityId, - dmge=dmge, - row=row, - hideBlanks=hideBlanks, - annotation_keys=annotation_keys, - ) - logger.info(f"Added annotations to entity: {entityId}") + + # Format annotations for Synapse + annos_task = asyncio.create_task(self.format_row_annotations( + dmge, row, entityId, hideBlanks, annotation_keys + )) + requests.add(annos_task) + + while requests: + done_tasks, pending_tasks = await asyncio.wait(requests, return_when=asyncio.FIRST_COMPLETED) + requests = pending_tasks + + for completed_task in done_tasks: + try: + annos = completed_task.result() + + if isinstance(annos, Annotations): + annos_dict = asdict(annos) + entity_id = annos_dict["id"] + logger.info(f"Successfully stored annotations for {entity_id}") + else: + # remove special characters in annotations + entity_id = annos["EntityId"] + logger.info(f"Got annotations for {entity_id} entity") + requests.add(asyncio.create_task(self.store_async_annotation(annotation_dict=annos))) + + except Exception as e: + raise RuntimeError(f"failed with { repr(e) }.") return manifest def upload_manifest_as_table( From 33c33eccdd04c982ac867ce648d701579782d977 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 12 Jun 2024 23:35:47 -0400 Subject: [PATCH 08/23] preserve current behavior; add annos --- schematic/store/synapse.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 4a80903f5..ebdbf8de1 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1775,8 +1775,9 @@ async def add_annotations_to_entities_files( else: # remove special characters in annotations entity_id = annos["EntityId"] - logger.info(f"Got annotations for {entity_id} entity") - requests.add(asyncio.create_task(self.store_async_annotation(annotation_dict=annos))) + logger.info(f"Obtained and processed annotations for {entity_id} entity") + if annos: + requests.add(asyncio.create_task(self.store_async_annotation(annotation_dict=annos))) except Exception as e: raise RuntimeError(f"failed with { repr(e) }.") From a09ea37a981b45fba89fb5db7071c878be5cfa1b Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 13 Jun 2024 07:30:15 -0400 Subject: [PATCH 09/23] run black --- schematic/store/synapse.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index ebdbf8de1..ac18fc1ff 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1733,7 +1733,7 @@ async def add_annotations_to_entities_files( ).drop("entityId_x", axis=1) # Fill `entityId` for each row if missing and annotate entity as appropriate - requests=set() + requests = set() for idx, row in manifest.iterrows(): if not row["entityId"] and ( manifest_record_type == "file_and_entities" @@ -1753,15 +1753,18 @@ async def add_annotations_to_entities_files( # Adding annotations to connected files. if entityId: - # Format annotations for Synapse - annos_task = asyncio.create_task(self.format_row_annotations( - dmge, row, entityId, hideBlanks, annotation_keys - )) + annos_task = asyncio.create_task( + self.format_row_annotations( + dmge, row, entityId, hideBlanks, annotation_keys + ) + ) requests.add(annos_task) while requests: - done_tasks, pending_tasks = await asyncio.wait(requests, return_when=asyncio.FIRST_COMPLETED) + done_tasks, pending_tasks = await asyncio.wait( + requests, return_when=asyncio.FIRST_COMPLETED + ) requests = pending_tasks for completed_task in done_tasks: @@ -1771,13 +1774,23 @@ async def add_annotations_to_entities_files( if isinstance(annos, Annotations): annos_dict = asdict(annos) entity_id = annos_dict["id"] - logger.info(f"Successfully stored annotations for {entity_id}") + logger.info( + f"Successfully stored annotations for {entity_id}" + ) else: # remove special characters in annotations entity_id = annos["EntityId"] - logger.info(f"Obtained and processed annotations for {entity_id} entity") + logger.info( + f"Obtained and processed annotations for {entity_id} entity" + ) if annos: - requests.add(asyncio.create_task(self.store_async_annotation(annotation_dict=annos))) + requests.add( + asyncio.create_task( + self.store_async_annotation( + annotation_dict=annos + ) + ) + ) except Exception as e: raise RuntimeError(f"failed with { repr(e) }.") From 56a64c31558b5127e19f982db5d5adf56600ac79 Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 13 Jun 2024 09:15:53 -0400 Subject: [PATCH 10/23] update to use synapseclient 4.3.0 --- poetry.lock | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 46ff137d6..9295cc34f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4331,13 +4331,13 @@ Jinja2 = ">=2.0" [[package]] name = "synapseclient" -version = "4.2.0" +version = "4.3.0" description = "A client for Synapse, a collaborative, open-source research platform that allows teams to share data, track analyses, and collaborate." optional = false python-versions = ">=3.8" files = [ - {file = "synapseclient-4.2.0-py3-none-any.whl", hash = "sha256:ab5bc9c2bf5b90f271f1a9478eff7e9fca3e573578401ac706383ddb984d7a13"}, - {file = "synapseclient-4.2.0.tar.gz", hash = "sha256:89222661125de1795b1a096cf8c58b8115c19d6b0fa5846ed2a41cdb394ef773"}, + {file = "synapseclient-4.3.0-py3-none-any.whl", hash = "sha256:5d8107cfff4031a0a46d60a3c9a8120300190fa27df4983d883dc951d8bd885f"}, + {file = "synapseclient-4.3.0.tar.gz", hash = "sha256:a1149a64b3281669d42c69e210677a902478b8f6b302966d518473c7384f6387"}, ] [package.dependencies] @@ -4357,11 +4357,11 @@ urllib3 = ">=1.26.18,<2" [package.extras] boto3 = ["boto3 (>=1.7.0,<2.0)"] -dev = ["black", "flake8 (>=3.7.0,<4.0)", "func-timeout (>=4.3,<5.0)", "pre-commit", "pytest (>=6.0.0,<7.0)", "pytest-asyncio (>=0.19,<1.0)", "pytest-cov (>=4.1.0,<4.2.0)", "pytest-mock (>=3.0,<4.0)", "pytest-rerunfailures (>=12.0,<13.0)", "pytest-socket (>=0.6.0,<0.7.0)", "pytest-xdist[psutil] (>=2.2,<3.0.0)"] +dev = ["black", "flake8 (>=3.7.0,<4.0)", "func-timeout (>=4.3,<5.0)", "pandas (>=1.5,<3.0)", "pre-commit", "pytest (>=7.0.0,<8.0)", "pytest-asyncio (>=0.23.6,<1.0)", "pytest-cov (>=4.1.0,<4.2.0)", "pytest-mock (>=3.0,<4.0)", "pytest-rerunfailures (>=12.0,<13.0)", "pytest-socket (>=0.6.0,<0.7.0)", "pytest-xdist[psutil] (>=2.2,<3.0.0)"] docs = ["markdown-include (>=0.8.1,<0.9.0)", "mkdocs (>=1.5.3)", "mkdocs-material (>=9.4.14)", "mkdocs-open-in-new-tab (>=1.0.3,<1.1.0)", "mkdocstrings (>=0.24.0)", "mkdocstrings-python (>=1.7.5)", "termynal (>=0.11.1)"] pandas = ["pandas (>=1.5,<3.0)"] pysftp = ["pysftp (>=0.2.8,<0.3)"] -tests = ["flake8 (>=3.7.0,<4.0)", "func-timeout (>=4.3,<5.0)", "pytest (>=6.0.0,<7.0)", "pytest-asyncio (>=0.19,<1.0)", "pytest-cov (>=4.1.0,<4.2.0)", "pytest-mock (>=3.0,<4.0)", "pytest-rerunfailures (>=12.0,<13.0)", "pytest-socket (>=0.6.0,<0.7.0)", "pytest-xdist[psutil] (>=2.2,<3.0.0)"] +tests = ["flake8 (>=3.7.0,<4.0)", "func-timeout (>=4.3,<5.0)", "pandas (>=1.5,<3.0)", "pytest (>=7.0.0,<8.0)", "pytest-asyncio (>=0.23.6,<1.0)", "pytest-cov (>=4.1.0,<4.2.0)", "pytest-mock (>=3.0,<4.0)", "pytest-rerunfailures (>=12.0,<13.0)", "pytest-socket (>=0.6.0,<0.7.0)", "pytest-xdist[psutil] (>=2.2,<3.0.0)"] [[package]] name = "tabulate" From d32bbbd03b4d58fed0cbbd678f2bda8c86acc1a0 Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 13 Jun 2024 09:18:13 -0400 Subject: [PATCH 11/23] update synapseclient veresion --- poetry.lock | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 9295cc34f..5a2d51e75 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4864,4 +4864,4 @@ aws = ["uWSGI"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.11" -content-hash = "fafb9420f9ac503b9e3a2f1c25321ea8eb59332eb82f01b418f7096def4488c5" +content-hash = "c47e0d03588f80b3f5b9ed8249317e45caf6ddccc4a301c76bcee099151605f2" diff --git a/pyproject.toml b/pyproject.toml index 87f203b1d..150508e81 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ pygsheets = "^2.0.4" PyYAML = "^6.0.0" rdflib = "^6.0.0" setuptools = "^66.0.0" -synapseclient = "^4.1.0" +synapseclient = "^4.3.0" tenacity = "^8.0.1" toml = "^0.10.2" great-expectations = "^0.15.0" From 16e62d41f8c95a11b0a3f37ecc1bf5e8b4aec0db Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 13 Jun 2024 13:10:26 -0400 Subject: [PATCH 12/23] build separate function;raise error --- schematic/store/synapse.py | 68 ++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index ac18fc1ff..eec31e384 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1692,6 +1692,40 @@ def _create_entity_id(self, idx, row, manifest, datasetId): manifest.loc[idx, "entityId"] = entityId return manifest, entityId + async def _store_annos(self, requests): + while requests: + done_tasks, pending_tasks = await asyncio.wait( + requests, return_when=asyncio.FIRST_COMPLETED + ) + requests = pending_tasks + + for completed_task in done_tasks: + try: + annos = completed_task.result() + + if isinstance(annos, Annotations): + annos_dict = asdict(annos) + entity_id = annos_dict["id"] + logger.info( + f"Successfully stored annotations for {entity_id}" + ) + else: + # remove special characters in annotations + entity_id = annos["EntityId"] + logger.info( + f"Obtained and processed annotations for {entity_id} entity" + ) + if annos: + requests.add( + asyncio.create_task( + self.store_async_annotation( + annotation_dict=annos + ) + ) + ) + except Exception as e: + raise RuntimeError(f"failed with { repr(e) }.") from e + async def add_annotations_to_entities_files( self, dmge, @@ -1760,40 +1794,8 @@ async def add_annotations_to_entities_files( ) ) requests.add(annos_task) + self._store_annos(requests) - while requests: - done_tasks, pending_tasks = await asyncio.wait( - requests, return_when=asyncio.FIRST_COMPLETED - ) - requests = pending_tasks - - for completed_task in done_tasks: - try: - annos = completed_task.result() - - if isinstance(annos, Annotations): - annos_dict = asdict(annos) - entity_id = annos_dict["id"] - logger.info( - f"Successfully stored annotations for {entity_id}" - ) - else: - # remove special characters in annotations - entity_id = annos["EntityId"] - logger.info( - f"Obtained and processed annotations for {entity_id} entity" - ) - if annos: - requests.add( - asyncio.create_task( - self.store_async_annotation( - annotation_dict=annos - ) - ) - ) - - except Exception as e: - raise RuntimeError(f"failed with { repr(e) }.") return manifest def upload_manifest_as_table( From 38249640148c9019dc42dedb4c332f76dc956adb Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 13 Jun 2024 14:14:08 -0400 Subject: [PATCH 13/23] use pytest asyncio --- poetry.lock | 20 +++++++++++++++++++- pyproject.toml | 1 + 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 5a2d51e75..8761b052f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3193,6 +3193,24 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-asyncio" +version = "0.23.7" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest_asyncio-0.23.7-py3-none-any.whl", hash = "sha256:009b48127fbe44518a547bddd25611551b0e43ccdbf1e67d12479f569832c20b"}, + {file = "pytest_asyncio-0.23.7.tar.gz", hash = "sha256:5f5c72948f4c49e7db4f29f2521d4031f1c27f86e57b046126654083d4770268"}, +] + +[package.dependencies] +pytest = ">=7.0.0,<9" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] + [[package]] name = "pytest-cov" version = "4.1.0" @@ -4864,4 +4882,4 @@ aws = ["uWSGI"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.11" -content-hash = "c47e0d03588f80b3f5b9ed8249317e45caf6ddccc4a301c76bcee099151605f2" +content-hash = "a3048c0808e73fd19f5175897e9dda47a2a593422dd4744886615ac453a42139" diff --git a/pyproject.toml b/pyproject.toml index 150508e81..aa0939b5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,7 @@ Flask-Cors = {version = "^3.0.10", optional = true} uWSGI = {version = "^2.0.21", optional = true} Jinja2 = {version = ">2.11.3", optional = true} asyncio = "^3.4.3" +pytest-asyncio = "^0.23.7" [tool.poetry.extras] api = ["connexion", "Flask", "Flask-Cors", "Jinja2", "pyopenssl"] From 1efb1349b6a90098db284f3c07da3b385798a87d Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 13 Jun 2024 17:45:19 -0400 Subject: [PATCH 14/23] add test --- tests/test_store.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/test_store.py b/tests/test_store.py index 06fa4bf23..88e44e923 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -28,6 +28,7 @@ SynapseStorage ) from schematic.utils.general import check_synapse_cache_size +from unittest.mock import AsyncMock logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -483,6 +484,20 @@ def test_get_files_metadata_from_dataset(self, synapse_store): "entityId": ["syn123", "syn456"], } + async def test_get_async_annotation(self, synapse_store): + mock_syn_id = "syn1234" + + with patch("schematic.store.synapse.get_entity_id_bundle2", new_callable=AsyncMock, return_value="mock") as mock_get_entity_id_bundle2: + mock_get_entity_id_bundle2.return_value="mock" + result = await synapse_store.get_async_annotation(synapse_id=mock_syn_id) + + mock_get_entity_id_bundle2.assert_called_once_with( + entity_id=mock_syn_id, + request={"includeAnnotations": True}, + synapse_client=synapse_store.syn, + ) + assert result == "mock" + class TestDatasetFileView: def test_init(self, dataset_id, dataset_fileview, synapse_store): From 2e53a3335a72731734455fe70cfd6ceca8ad4f8b Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 14 Jun 2024 00:11:13 -0400 Subject: [PATCH 15/23] fix store_annos --- schematic/store/synapse.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index eec31e384..91aa602bb 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1695,8 +1695,8 @@ def _create_entity_id(self, idx, row, manifest, datasetId): async def _store_annos(self, requests): while requests: done_tasks, pending_tasks = await asyncio.wait( - requests, return_when=asyncio.FIRST_COMPLETED - ) + requests, return_when=asyncio.FIRST_COMPLETED + ) requests = pending_tasks for completed_task in done_tasks: @@ -1706,9 +1706,7 @@ async def _store_annos(self, requests): if isinstance(annos, Annotations): annos_dict = asdict(annos) entity_id = annos_dict["id"] - logger.info( - f"Successfully stored annotations for {entity_id}" - ) + logger.info(f"Successfully stored annotations for {entity_id}") else: # remove special characters in annotations entity_id = annos["EntityId"] @@ -1718,9 +1716,7 @@ async def _store_annos(self, requests): if annos: requests.add( asyncio.create_task( - self.store_async_annotation( - annotation_dict=annos - ) + self.store_async_annotation(annotation_dict=annos) ) ) except Exception as e: @@ -1794,8 +1790,7 @@ async def add_annotations_to_entities_files( ) ) requests.add(annos_task) - self._store_annos(requests) - + await self._store_annos(requests) return manifest def upload_manifest_as_table( From f1576696826bfd1f0e27290412227d8f9b6958c9 Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 14 Jun 2024 06:43:44 -0400 Subject: [PATCH 16/23] add test_get_async_annotation --- tests/test_store.py | 71 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 15 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index 88e44e923..29e91331b 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -22,13 +22,10 @@ from tests.conftest import Helpers from schematic.store.base import BaseStorage -from schematic.store.synapse import ( - DatasetFileView, - ManifestDownload, - SynapseStorage -) +from schematic.store.synapse import DatasetFileView, ManifestDownload, SynapseStorage from schematic.utils.general import check_synapse_cache_size from unittest.mock import AsyncMock +from synapseclient.models import Annotations logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -132,7 +129,7 @@ def test_init(self): class TestSynapseStorage: "Tests the SynapseStorage class" - def test_init(self, synapse_store:SynapseStorage) -> None: + def test_init(self, synapse_store: SynapseStorage) -> None: """Tests SynapseStorage.__init__""" assert synapse_store.storageFileview == "syn23643253" assert isinstance(synapse_store.storageFileviewTable, pd.DataFrame) @@ -143,8 +140,7 @@ def test__purge_synapse_cache(self) -> None: synapse_store = SynapseStorage(synapse_cache_path="test_cache_dir") size_before_purge = check_synapse_cache_size(synapse_store.root_synapse_cache) synapse_store._purge_synapse_cache( - maximum_storage_allowed_cache_gb=0.000001, - minute_buffer=0 + maximum_storage_allowed_cache_gb=0.000001, minute_buffer=0 ) size_after_purge = check_synapse_cache_size(synapse_store.root_synapse_cache) assert size_before_purge > size_after_purge @@ -158,7 +154,7 @@ def test_login(self) -> None: assert synapse_client.cache.cache_root_dir == "test_cache_dir" shutil.rmtree("test_cache_dir") - def test_getFileAnnotations(self, synapse_store:SynapseStorage) -> None: + def test_getFileAnnotations(self, synapse_store: SynapseStorage) -> None: expected_dict = { "author": "bruno, milen, sujay", "impact": "42.9", @@ -221,17 +217,17 @@ def test_get_file_entityIds(self, helpers, synapse_store, only_new_files): {"CheckInt": "7", "CheckList": "valid, list, values"}, "syn34295552", "file_and_entities", - "annotations_test_manifest.csv" + "annotations_test_manifest.csv", ), ( {"FileFormat": "BAM", "GenomeBuild": "GRCh38"}, "syn39241199", "table_and_file", - "test_BulkRNAseq.csv" + "test_BulkRNAseq.csv", ), ], ids=["non file-based", "file-based"], - indirect=["temporary_file_copy"] + indirect=["temporary_file_copy"], ) def test_annotation_submission( self, @@ -484,11 +480,16 @@ def test_get_files_metadata_from_dataset(self, synapse_store): "entityId": ["syn123", "syn456"], } - async def test_get_async_annotation(self, synapse_store): + async def test_get_async_annotation(self, synapse_store: SynapseStorage) -> None: + """test get annotation async function""" mock_syn_id = "syn1234" - with patch("schematic.store.synapse.get_entity_id_bundle2", new_callable=AsyncMock, return_value="mock") as mock_get_entity_id_bundle2: - mock_get_entity_id_bundle2.return_value="mock" + with patch( + "schematic.store.synapse.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value="mock", + ) as mock_get_entity_id_bundle2: + mock_get_entity_id_bundle2.return_value = "mock" result = await synapse_store.get_async_annotation(synapse_id=mock_syn_id) mock_get_entity_id_bundle2.assert_called_once_with( @@ -498,6 +499,46 @@ async def test_get_async_annotation(self, synapse_store): ) assert result == "mock" + async def test_store_async_annotation(self, synapse_store: SynapseStorage) -> None: + """test store annotations async function""" + annos_dict = { + "annotations": { + "id": "mock_syn_id", + "etag": "mock etag", + "annotations": { + "Id": {"type": "STRING", "value": ["mock value"]}, + "EntityId": {"type": "STRING", "value": ["mock_syn_id"]}, + "SampleID": {"type": "STRING", "value": [""]}, + "Component": {"type": "STRING", "value": ["mock value"]}, + }, + }, + "FileFormat": "mock format", + "Component": "mock component", + "Id": "mock_string", + "EntityId": "mock_id", + } + expected_dict = Annotations( + annotations={ + "Id": ["mock_string"], + "EntityId": ["mock_syn_id"], + "SampleID": [""], + "Component": ["mock value"], + "FileFormat": ["mock_format"], + }, + etag="mock etag", + id="mock syn_id", + ) + + with patch( + "schematic.store.synapse.Annotations.store_async", + new_callable=AsyncMock, + return_value=expected_dict, + ) as mock_store_async: + result = await synapse_store.store_async_annotation(annos_dict) + + mock_store_async.assert_called_once_with(synapse_store.syn) + assert isinstance(result, Annotations) + class TestDatasetFileView: def test_init(self, dataset_id, dataset_fileview, synapse_store): From ac18c76f8223e7cd461399c68990a7d0cf93e464 Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 14 Jun 2024 15:20:51 -0400 Subject: [PATCH 17/23] add assert statement --- tests/test_store.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_store.py b/tests/test_store.py index 29e91331b..da193282d 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -537,6 +537,7 @@ async def test_store_async_annotation(self, synapse_store: SynapseStorage) -> No result = await synapse_store.store_async_annotation(annos_dict) mock_store_async.assert_called_once_with(synapse_store.syn) + assert result == expected_dict assert isinstance(result, Annotations) From 1ae73359762d51004179ecbcd4340a02f52b0a84 Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 14 Jun 2024 15:48:40 -0400 Subject: [PATCH 18/23] rename and add typing --- schematic/store/synapse.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 91aa602bb..3c1137da0 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -24,7 +24,7 @@ from time import sleep # allows specifying explicit variable types -from typing import Dict, List, Tuple, Sequence, Union, Optional, Any +from typing import Dict, List, Tuple, Sequence, Union, Optional, Any, Set from synapseclient import ( Synapse, @@ -1692,7 +1692,15 @@ def _create_entity_id(self, idx, row, manifest, datasetId): manifest.loc[idx, "entityId"] = entityId return manifest, entityId - async def _store_annos(self, requests): + async def _process_store_annos(self, requests: Set[asyncio.Task]) -> None: + """Process annotations and store them on synapse asynchronously + + Args: + requests (Set[asyncio.Task]): a set of tasks of formatting annotations created by format_row_annotations function in previous step + + Raises: + RuntimeError: raise a run time error if a task failed to complete + """ while requests: done_tasks, pending_tasks = await asyncio.wait( requests, return_when=asyncio.FIRST_COMPLETED @@ -1790,7 +1798,7 @@ async def add_annotations_to_entities_files( ) ) requests.add(annos_task) - await self._store_annos(requests) + await self._process_store_annos(requests) return manifest def upload_manifest_as_table( From c8a976bd0578150d78b0fbab5a4776847f113f8e Mon Sep 17 00:00:00 2001 From: linglp Date: Sat, 15 Jun 2024 18:04:15 -0400 Subject: [PATCH 19/23] add test of _add_annos and edit comment --- schematic/store/synapse.py | 1 - tests/test_store.py | 84 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 3c1137da0..e6fcac7af 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1716,7 +1716,6 @@ async def _process_store_annos(self, requests: Set[asyncio.Task]) -> None: entity_id = annos_dict["id"] logger.info(f"Successfully stored annotations for {entity_id}") else: - # remove special characters in annotations entity_id = annos["EntityId"] logger.info( f"Obtained and processed annotations for {entity_id} entity" diff --git a/tests/test_store.py b/tests/test_store.py index da193282d..def4b6d3b 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -9,6 +9,7 @@ from typing import Generator, Any from unittest.mock import patch import shutil +import asyncio import pandas as pd import pytest @@ -541,6 +542,89 @@ async def test_store_async_annotation(self, synapse_store: SynapseStorage) -> No assert isinstance(result, Annotations) + async def test_process_store_annos_failure(self, synapse_store: SynapseStorage) -> None: + """test _process_store_annos function when there's an error either getting or storing annotations + """ + async def mock_failure_coro(): + await asyncio.sleep(0.1) + raise ValueError("sample error") + + # create tasks that will fail + tasks = set() + tasks.add(asyncio.create_task(mock_failure_coro())) + + synapse_store._process_store_annos + # make sure error message can be raised + with pytest.raises(RuntimeError, match="failed with"): + await synapse_store._process_store_annos(tasks) + + async def test_process_store_annos_success_store(self, synapse_store: SynapseStorage) -> None: + """test _process_store_annos function and make sure that annotations can be stored after successfully getting annotations. + """ + # mock annotation obtained after async_store + stored_annos = Annotations( + annotations={ + "Id": ["mock_string"], + "EntityId": ["mock_syn_id"], + "SampleID": [""], + "Component": ["mock value"], + "FileFormat": ["mock_format"], + }, + etag="mock etag", + id="mock_syn_id") + + async def mock_success_coro(): + await asyncio.sleep(0.1) + return stored_annos + + with patch("schematic.store.synapse.SynapseStorage.store_async_annotation",new_callable=AsyncMock) as mock_store_async1: + tasks = set() + tasks.add(asyncio.create_task(mock_success_coro())) + await synapse_store._process_store_annos(tasks) + # make sure that the if statement is working + mock_store_async1.assert_not_called() + + + async def test_process_store_annos_success_get(self, synapse_store: SynapseStorage) -> None: + """test _process_store_annos function and make sure that task of storing annotations can be triggered + """ + # mock annotation obtained after get_async + mock_annos_dict = { + "annotations": { + "id": "mock_syn_id", + "etag": "mock etag", + "annotations": { + "Id": {"type": "STRING", "value": ["mock value"]}, + "EntityId": {"type": "STRING", "value": ["mock_syn_id"]}, + "SampleID": {"type": "STRING", "value": [""]}, + "Component": {"type": "STRING", "value": ["mock value"]}, + }, + }, + "FileFormat": "mock format", + "Component": "mock component", + "Id": "mock_string", + "EntityId": "mock_id", + } + + mock_stored_annos = Annotations( + annotations={ + "Id": ["mock_string"], + "EntityId": ["mock_syn_id"], + }, + etag="mock etag", + id="mock_syn_id") + + async def mock_success_coro(): + await asyncio.sleep(0.1) + return mock_annos_dict + + # make sure that the else statement is working + new_tasks = set() + with patch("schematic.store.synapse.SynapseStorage.store_async_annotation",new_callable=AsyncMock, return_value=mock_stored_annos) as mock_store_async2: + new_tasks.add(asyncio.create_task(mock_success_coro())) + await synapse_store._process_store_annos(new_tasks) + mock_store_async2.assert_called_once() + class TestDatasetFileView: def test_init(self, dataset_id, dataset_fileview, synapse_store): assert dataset_fileview.datasetId == dataset_id From 4addbab81d7b0efac7d9f53abb9d0eae071785cc Mon Sep 17 00:00:00 2001 From: linglp Date: Sat, 15 Jun 2024 22:04:05 -0400 Subject: [PATCH 20/23] fix test --- tests/test_store.py | 47 +++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index def4b6d3b..d4adb36d1 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -1077,7 +1077,7 @@ class TestManifestUpload: ), ], ) - def test_add_annotations_to_entities_files( + async def test_add_annotations_to_entities_files( self, synapse_store: SynapseStorage, dmge: DataModelGraphExplorer, @@ -1097,27 +1097,40 @@ def test_add_annotations_to_entities_files( expected_filenames (list(str)): expected list of file names expected_entity_ids (list(str)): expected list of entity ids """ + async def mock_format_row_annos(): + await asyncio.sleep(0.1) + + async def mock_process_store_annos(requests): + await asyncio.sleep(0.1) + with patch( "schematic.store.synapse.SynapseStorage.getFilesInStorageDataset", return_value=files_in_dataset, ): - manifest_df = pd.DataFrame(original_manifest) + with patch('schematic.store.synapse.SynapseStorage.format_row_annotations', return_value=mock_format_row_annos, new_callable=AsyncMock) as mock_format_row: + with patch('schematic.store.synapse.SynapseStorage._process_store_annos', return_value=mock_process_store_annos, new_callable=AsyncMock) as mock_process_store: + manifest_df = pd.DataFrame(original_manifest) + + new_df = await synapse_store.add_annotations_to_entities_files( + dmge, + manifest_df, + manifest_record_type="entity", + datasetId="mock id", + hideBlanks=True, + ) - new_df = synapse_store.add_annotations_to_entities_files( - dmge, - manifest_df, - manifest_record_type="entity", - datasetId="mock id", - hideBlanks=True, - ) - file_names_lst = new_df["Filename"].tolist() - entity_ids_lst = new_df["entityId"].tolist() - - # test entityId and Id columns get added - assert "entityId" in new_df.columns - assert "Id" in new_df.columns - assert file_names_lst == expected_filenames - assert entity_ids_lst == expected_entity_ids + file_names_lst = new_df["Filename"].tolist() + entity_ids_lst = new_df["entityId"].tolist() + + # test entityId and Id columns get added + assert "entityId" in new_df.columns + assert "Id" in new_df.columns + assert file_names_lst == expected_filenames + assert entity_ids_lst == expected_entity_ids + + # make sure async function gets called as expected + assert mock_format_row.call_count == len(expected_entity_ids) + assert mock_process_store.call_count == 1 @pytest.mark.parametrize( "mock_manifest_file_path", From ff33bbfe719fd94be239bcbdebf38d60fc1280b2 Mon Sep 17 00:00:00 2001 From: linglp Date: Sat, 15 Jun 2024 22:35:40 -0400 Subject: [PATCH 21/23] remove add_annotation function --- schematic/store/synapse.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index e6fcac7af..84ebb0696 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1643,36 +1643,6 @@ def _generate_table_name(self, manifest): table_name = "synapse_storage_manifest_table" return table_name, component_name - def _add_annotations( - self, - dmge, - row, - entityId: str, - hideBlanks: bool, - annotation_keys: str, - ): - """Helper function to format and add annotations to entities in Synapse. - Args: - dmge: DataModelGraphExplorer object, - row: current row of manifest being processed - entityId (str): synapseId of entity to add annotations to - hideBlanks: Boolean flag that does not upload annotation keys with blank values when true. Uploads Annotation keys with empty string values when false. - annotation_keys: (str) display_label/class_label(default), Determines labeling syle for annotation keys. class_label will format the display - name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain - display label formatting while ensuring the label is formatted properly for Synapse annotations. - Returns: - Annotations are added to entities in Synapse, no return. - """ - # Format annotations for Synapse - annos = self.format_row_annotations( - dmge, row, entityId, hideBlanks, annotation_keys - ) - - if annos: - # Store annotations for an entity folder - self.syn.set_annotations(annos) - return - def _create_entity_id(self, idx, row, manifest, datasetId): """Helper function to generate an entityId and add it to the appropriate row in the manifest. Args: From 29a00a2b1fc244404741f13f06c4182a06833474 Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 18 Jun 2024 11:42:44 -0400 Subject: [PATCH 22/23] remove sleep in test --- tests/test_store.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index d4adb36d1..98d11fd48 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -546,8 +546,7 @@ async def test_process_store_annos_failure(self, synapse_store: SynapseStorage) """test _process_store_annos function when there's an error either getting or storing annotations """ async def mock_failure_coro(): - await asyncio.sleep(0.1) - raise ValueError("sample error") + raise ValueError("sample error") # create tasks that will fail tasks = set() @@ -572,9 +571,8 @@ async def test_process_store_annos_success_store(self, synapse_store: SynapseSto }, etag="mock etag", id="mock_syn_id") - + async def mock_success_coro(): - await asyncio.sleep(0.1) return stored_annos with patch("schematic.store.synapse.SynapseStorage.store_async_annotation",new_callable=AsyncMock) as mock_store_async1: @@ -615,7 +613,6 @@ async def test_process_store_annos_success_get(self, synapse_store: SynapseStora id="mock_syn_id") async def mock_success_coro(): - await asyncio.sleep(0.1) return mock_annos_dict # make sure that the else statement is working @@ -1098,10 +1095,10 @@ async def test_add_annotations_to_entities_files( expected_entity_ids (list(str)): expected list of entity ids """ async def mock_format_row_annos(): - await asyncio.sleep(0.1) + return async def mock_process_store_annos(requests): - await asyncio.sleep(0.1) + return with patch( "schematic.store.synapse.SynapseStorage.getFilesInStorageDataset", From adc4c012a7aa1bf311a0280ef9eaaf8d83d01ff4 Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 21 Jun 2024 09:31:57 -0400 Subject: [PATCH 23/23] fix test --- tests/test_store.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index 79b1e8bd7..cd5f4385b 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -1232,9 +1232,14 @@ def test_upload_manifest_as_csv( hide_blanks: bool, restrict: bool, ) -> None: + async def mock_add_annotations_to_entities_files(): + return + with ( patch( - "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files" + "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files", + return_value=mock_add_annotations_to_entities_files, + new_callable=AsyncMock, ) as add_anno_mock, patch( "schematic.store.synapse.SynapseStorage.upload_manifest_file", @@ -1282,13 +1287,19 @@ def test_upload_manifest_as_table( manifest_record_type: str, ) -> None: mock_df = pd.DataFrame() + + async def mock_add_annotations_to_entities_files(): + return + with ( patch( "schematic.store.synapse.SynapseStorage.uploadDB", return_value=["mock_table_id", mock_df, "mock_table_manifest"], ) as update_db_mock, patch( - "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files" + "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files", + return_value=mock_add_annotations_to_entities_files, + new_callable=AsyncMock, ) as add_anno_mock, patch( "schematic.store.synapse.SynapseStorage.upload_manifest_file", @@ -1342,13 +1353,19 @@ def test_upload_manifest_combo( mock_df = pd.DataFrame() manifest_path = helpers.get_data_path("mock_manifests/test_BulkRNAseq.csv") manifest_df = helpers.get_data_frame(manifest_path) + + async def mock_add_annotations_to_entities_files(): + return + with ( patch( "schematic.store.synapse.SynapseStorage.uploadDB", return_value=["mock_table_id", mock_df, "mock_table_manifest"], ) as update_db_mock, patch( - "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files" + "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files", + return_value=mock_add_annotations_to_entities_files, + new_callable=AsyncMock, ) as add_anno_mock, patch( "schematic.store.synapse.SynapseStorage.upload_manifest_file",