diff --git a/src/aind_data_transfer_service/__init__.py b/src/aind_data_transfer_service/__init__.py index 0892676..d41314b 100644 --- a/src/aind_data_transfer_service/__init__.py +++ b/src/aind_data_transfer_service/__init__.py @@ -1,2 +1,9 @@ """Init package""" +import os + __version__ = "0.8.4" + +# Global constants +OPEN_DATA_BUCKET_NAME = os.getenv("OPEN_DATA_BUCKET_NAME", "open") +PRIVATE_BUCKET_NAME = os.getenv("PRIVATE_BUCKET_NAME", "private") +SCRATCH_BUCKET_NAME = os.getenv("SCRATCH_BUCKET_NAME", "scratch") diff --git a/src/aind_data_transfer_service/configs/job_configs.py b/src/aind_data_transfer_service/configs/job_configs.py index 776ed5e..8a5b6e7 100644 --- a/src/aind_data_transfer_service/configs/job_configs.py +++ b/src/aind_data_transfer_service/configs/job_configs.py @@ -1,5 +1,6 @@ """This module adds classes to handle resolving common endpoints used in the data transfer jobs.""" +import os import re from datetime import datetime from pathlib import PurePosixPath @@ -19,6 +20,12 @@ ) from pydantic_settings import BaseSettings +from aind_data_transfer_service import ( + OPEN_DATA_BUCKET_NAME, + PRIVATE_BUCKET_NAME, + SCRATCH_BUCKET_NAME, +) + class ModalityConfigs(BaseSettings): """Class to contain configs for each modality type""" @@ -125,10 +132,11 @@ class BasicUploadJobConfigs(BaseSettings): aws_param_store_name: Optional[str] = Field(None) - s3_bucket: str = Field( - ..., + s3_bucket: Optional[str] = Field( + None, description="Bucket where data will be uploaded", title="S3 Bucket", + validate_default=True, ) platform: Platform.ONE_OF = Field( ..., description="Platform", title="Platform" @@ -208,6 +216,19 @@ def s3_prefix(self): creation_datetime=self.acq_datetime, ) + @field_validator("s3_bucket", mode="before") + def map_bucket(cls, bucket: Optional[str]) -> Optional[str]: + """We're adding a policy that data uploaded through the service can + only land in a handful of buckets. As default, things will be + stored in the private bucket""" + if bucket is not None and bucket in [ + OPEN_DATA_BUCKET_NAME, + SCRATCH_BUCKET_NAME, + ]: + return bucket + else: + return PRIVATE_BUCKET_NAME + @field_validator("platform", mode="before") def parse_platform_string( cls, input_platform: Union[str, dict, Platform] diff --git a/src/aind_data_transfer_service/configs/job_upload_template.py b/src/aind_data_transfer_service/configs/job_upload_template.py index fc53915..867721b 100644 --- a/src/aind_data_transfer_service/configs/job_upload_template.py +++ b/src/aind_data_transfer_service/configs/job_upload_template.py @@ -18,7 +18,6 @@ class JobUploadTemplate: "platform", "acq_datetime", "subject_id", - "s3_bucket", "modality0", "modality0.source", "modality1", @@ -29,7 +28,6 @@ class JobUploadTemplate: Platform.BEHAVIOR.abbreviation, datetime.datetime(2023, 10, 4, 4, 0, 0), "123456", - "aind-behavior-data", Modality.BEHAVIOR_VIDEOS.abbreviation, "/allen/aind/stage/fake/dir", Modality.BEHAVIOR.abbreviation, @@ -39,7 +37,6 @@ class JobUploadTemplate: Platform.SMARTSPIM.abbreviation, datetime.datetime(2023, 3, 4, 16, 30, 0), "654321", - "aind-open-data", Modality.SPIM.abbreviation, "/allen/aind/stage/fake/dir", ], @@ -47,7 +44,6 @@ class JobUploadTemplate: Platform.ECEPHYS.abbreviation, datetime.datetime(2023, 1, 30, 19, 1, 0), "654321", - "aind-ephys-data", Modality.ECEPHYS.abbreviation, "/allen/aind/stage/fake/dir", Modality.BEHAVIOR_VIDEOS.abbreviation, @@ -65,16 +61,6 @@ class JobUploadTemplate: "options": [m().abbreviation for m in Modality._ALL], "ranges": ["E2:E20", "G2:G20"], }, - { - "name": "s3_bucket", - "options": [ - "aind-ephys-data", - "aind-ophys-data", - "aind-behavior-data", - "aind-private-data", - ], - "ranges": ["D2:D20"], - }, ] @staticmethod @@ -104,7 +90,7 @@ def create_job_template(): worksheet.add_data_validation(dv) # formatting bold = Font(bold=True) - for header in worksheet["A1:H1"]: + for header in worksheet["A1:G1"]: for cell in header: cell.font = bold worksheet.column_dimensions[ diff --git a/src/aind_data_transfer_service/server.py b/src/aind_data_transfer_service/server.py index 4489cff..8452e26 100644 --- a/src/aind_data_transfer_service/server.py +++ b/src/aind_data_transfer_service/server.py @@ -15,6 +15,7 @@ from starlette.applications import Starlette from starlette.routing import Route +from aind_data_transfer_service import OPEN_DATA_BUCKET_NAME from aind_data_transfer_service.configs.job_configs import ( BasicUploadJobConfigs, HpcJobConfigs, @@ -48,8 +49,6 @@ # OPEN_DATA_AWS_SECRET_ACCESS_KEY # OPEN_DATA_AWS_ACCESS_KEY_ID -OPEN_DATA_BUCKET_NAME = os.getenv("OPEN_DATA_BUCKET_NAME", "aind-open-data") - async def validate_csv(request: Request): """Validate a csv or xlsx file. Return parsed contents as json.""" diff --git a/tests/resources/job_upload_template.xlsx b/tests/resources/job_upload_template.xlsx index 14d0f79..227cf26 100644 Binary files a/tests/resources/job_upload_template.xlsx and b/tests/resources/job_upload_template.xlsx differ diff --git a/tests/resources/sample.csv b/tests/resources/sample.csv index beecb88..370494a 100644 --- a/tests/resources/sample.csv +++ b/tests/resources/sample.csv @@ -1,4 +1,4 @@ modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime ECEPHYS, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10 -BEHAVIOR_VIDEOS, dir/data_set_2, MRI, dir/data_set_3, some_bucket2, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM -BEHAVIOR_VIDEOS, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, some_bucket2, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM +BEHAVIOR_VIDEOS, dir/data_set_2, MRI, dir/data_set_3, open, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM +BEHAVIOR_VIDEOS, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, scratch, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM diff --git a/tests/resources/sample.xlsx b/tests/resources/sample.xlsx index 76b2faf..52a0fa1 100644 Binary files a/tests/resources/sample.xlsx and b/tests/resources/sample.xlsx differ diff --git a/tests/resources/sample_alt_modality_case.csv b/tests/resources/sample_alt_modality_case.csv index b23dcad..1ce7623 100644 --- a/tests/resources/sample_alt_modality_case.csv +++ b/tests/resources/sample_alt_modality_case.csv @@ -1,4 +1,4 @@ modality0, modality0.source, modality1, modality1.source, s3-bucket, subject-id, platform, acq-datetime ecephys, dir/data_set_1, ,, some_bucket, 123454, ecephys, 2020-10-10 14:10:10 -behavior-videos, dir/data_set_2, MRI, dir/data_set_3, some_bucket2, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM -behavior-videos, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, some_bucket2, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM +behavior-videos, dir/data_set_2, MRI, dir/data_set_3, open, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM +behavior-videos, dir/data_set_2, BEHAVIOR_VIDEOS, dir/data_set_3, scratch, 123456, BEHAVIOR, 10/13/2020 1:10:10 PM diff --git a/tests/test_configs.py b/tests/test_configs.py index deda2c0..5767c31 100644 --- a/tests/test_configs.py +++ b/tests/test_configs.py @@ -5,6 +5,7 @@ import unittest from datetime import datetime from pathlib import Path, PurePosixPath +from unittest.mock import patch from aind_data_schema.core.processing import ProcessName from aind_data_schema.models.modalities import Modality @@ -27,7 +28,7 @@ class TestJobConfigs(unittest.TestCase): expected_job_configs = [ BasicUploadJobConfigs( aws_param_store_name="/some/param/store", - s3_bucket="some_bucket", + s3_bucket="private", platform=Platform.ECEPHYS, modalities=[ ModalityConfigs( @@ -50,7 +51,7 @@ class TestJobConfigs(unittest.TestCase): ), BasicUploadJobConfigs( aws_param_store_name="/some/param/store", - s3_bucket="some_bucket2", + s3_bucket="open", platform=Platform.BEHAVIOR, modalities=[ ModalityConfigs( @@ -80,7 +81,7 @@ class TestJobConfigs(unittest.TestCase): ), BasicUploadJobConfigs( aws_param_store_name="/some/param/store", - s3_bucket="some_bucket2", + s3_bucket="scratch", platform=Platform.BEHAVIOR, modalities=[ ModalityConfigs( @@ -308,7 +309,7 @@ def test_from_job_and_server_configs(self): " python -m aind_data_transfer.jobs.basic_job" " --json-args ' " '{"aws_param_store_name":"/some/param/store",' - '"s3_bucket":"some_bucket",' + '"s3_bucket":"private",' '"platform":{"name":"Electrophysiology platform",' '"abbreviation":"ecephys"},' '"modalities":[{"modality":' diff --git a/tests/test_server.py b/tests/test_server.py index 8cd7bde..d2a774e 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -268,7 +268,7 @@ def test_submit_hpc_jobs( { "hpc_settings": '{"qos":"production", "name": "job1"}', "upload_job_settings": ( - '{"s3_bucket": "some_bucket2", ' + '{"s3_bucket": "private", ' '"platform": {"name": "Behavior platform", ' '"abbreviation": "behavior"}, ' '"modalities": [' @@ -334,7 +334,7 @@ def test_submit_hpc_jobs_open_data( { "hpc_settings": '{"qos":"production", "name": "job1"}', "upload_job_settings": ( - '{"s3_bucket": "aind-open-data", ' + '{"s3_bucket": "open", ' '"platform": {"name": "Behavior platform", ' '"abbreviation": "behavior"}, ' '"modalities": ['