Skip to content

Commit 11aa341

Browse files
committed
Added toolkits for common microservices, upgraded build to use 'uv'
Updated toolkits (and typehints) for the following services: * workflow manager * metadata manager * file manager * fastq manager Also updated the dynamodb partition table to use pointInTimeRecoverySpecification since pointInTimeRecovery parameter is deprecated
1 parent 97dfd0f commit 11aa341

File tree

44 files changed

+2620
-156
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+2620
-156
lines changed

lib/workload/components/dynamodb-partitioned-table/index.ts

+3-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ export class DynamodbPartitionedPipelineConstruct extends Construct {
2727
},
2828
tableName: props.tableName,
2929
removalPolicy: props.removalPolicy || RemovalPolicy.RETAIN_ON_UPDATE_OR_DELETE,
30-
pointInTimeRecovery: true,
30+
pointInTimeRecoverySpecification: {
31+
pointInTimeRecoveryEnabled: true,
32+
},
3133
});
3234

3335
// Set outputs
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
[build-system]
2+
requires = ["poetry-core>=1.0.0"]
3+
build-backend = "poetry.core.masonry.api"
4+
5+
[tool.poetry]
6+
name = "fastq_tools"
7+
version = "0.0.1"
8+
description = "Workflow Manager Lambda Layers"
9+
license = "GPL-3.0-or-later"
10+
authors = [
11+
"Alexis Lucattini"
12+
]
13+
homepage = "https://github.com/umccr/orcabus"
14+
repository = "https://github.com/umccr/orcabus"
15+
16+
[tool.poetry.dependencies]
17+
python = "^3.12, <3.13"
18+
requests = "^2.32.3"
19+
20+
[tool.poetry.group.dev]
21+
optional = true
22+
23+
[tool.poetry.group.dev.dependencies]
24+
pytest = "^7.0.0" # For testing only
25+
# For typehinting only, not required at runtime
26+
mypy-boto3-ssm = "^1.34"
27+
mypy-boto3-secretsmanager = "^1.34"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
Fastq tools to be used by various lambdas as needed
5+
"""
6+
from .utils.models import(
7+
FastqListRow,
8+
FastqStorageObject,
9+
FileStorageObject,
10+
FastqSet,
11+
)
12+
13+
from .utils.query_helpers import (
14+
get_fastq,
15+
get_fastqs,
16+
get_fastqs_in_instrument_run_id,
17+
get_fastqs_in_library,
18+
get_fastqs_in_library_list,
19+
get_fastqs_in_libraries_and_instrument_run_id,
20+
get_fastqs_in_sample,
21+
get_fastqs_in_subject,
22+
get_fastqs_in_individual,
23+
get_fastqs_in_project,
24+
get_fastq_set
25+
)
26+
27+
28+
from .utils.update_helpers import (
29+
add_qc_stats,
30+
add_read_count,
31+
add_file_compression_information,
32+
add_ntsm_storage_object,
33+
add_read_set,
34+
detach_read_set,
35+
validate_fastq,
36+
invalidate_fastq
37+
)
38+
39+
from .utils.workflow_helpers import (
40+
to_cwl
41+
)
42+
43+
44+
__all__ = [
45+
# Models
46+
"FastqListRow",
47+
"FastqStorageObject",
48+
"FileStorageObject",
49+
"FastqSet",
50+
51+
# Query helpers
52+
"get_fastq",
53+
"get_fastqs",
54+
"get_fastqs_in_instrument_run_id",
55+
"get_fastqs_in_library",
56+
"get_fastqs_in_library_list",
57+
"get_fastqs_in_libraries_and_instrument_run_id",
58+
"get_fastqs_in_sample",
59+
"get_fastqs_in_subject",
60+
"get_fastqs_in_individual",
61+
"get_fastqs_in_project",
62+
63+
# Fastq Set Query helpers
64+
"get_fastq_set",
65+
66+
# Update helpers
67+
"add_qc_stats",
68+
"add_read_count",
69+
"add_file_compression_information",
70+
"add_ntsm_storage_object",
71+
"add_read_set",
72+
"detach_read_set",
73+
"validate_fastq",
74+
"invalidate_fastq",
75+
76+
# Workflow helpers
77+
"to_cwl",
78+
]

lib/workload/components/python-fastq-tools-layer/fastq_tools_layer/src/fastq_tools/utils/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#!/usr/bin/env python3
2+
3+
# Standard imports
4+
import typing
5+
import boto3
6+
import json
7+
from os import environ
8+
9+
10+
# Type hinting
11+
if typing.TYPE_CHECKING:
12+
from mypy_boto3_secretsmanager import SecretsManagerClient
13+
from mypy_boto3_ssm import SSMClient
14+
15+
16+
def get_secretsmanager_client() -> 'SecretsManagerClient':
17+
return boto3.client('secretsmanager')
18+
19+
20+
def get_ssm_client() -> 'SSMClient':
21+
return boto3.client('ssm')
22+
23+
24+
def get_secret_value(secret_id) -> str:
25+
"""
26+
Collect the secret value
27+
:param secret_id:
28+
:return:
29+
"""
30+
# Get the boto3 response
31+
get_secret_value_response = get_secretsmanager_client().get_secret_value(SecretId=secret_id)
32+
33+
return get_secret_value_response['SecretString']
34+
35+
36+
def get_ssm_value(parameter_name) -> str:
37+
# Get the boto3 response
38+
get_ssm_parameter_response = get_ssm_client().get_parameter(Name=parameter_name)
39+
40+
return get_ssm_parameter_response['Parameter']['Value']
41+
42+
43+
def get_orcabus_token() -> str:
44+
"""
45+
From the AWS Secrets Manager, retrieve the OrcaBus token.
46+
:return:
47+
"""
48+
return json.loads(get_secret_value(environ.get("ORCABUS_TOKEN_SECRET_ID")))['id_token']
49+
50+
51+
def get_hostname() -> str:
52+
return get_ssm_value(environ.get("HOSTNAME_SSM_PARAMETER"))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/usr/bin/env python3
2+
3+
import re
4+
5+
# AWS PARAMETERS
6+
FASTQ_SUBDOMAIN_NAME = "fastq"
7+
8+
# API ENDPOINTS
9+
FASTQ_LIST_ROW_ENDPOINT = "api/v1/fastq"
10+
FASTQ_SET_ENDPOINT = "api/v1/fastqSet"
11+
12+
# REGEX
13+
ORCABUS_ULID_REGEX_MATCH = re.compile(r'^[a-z0-9]{3}\.[A-Z0-9]{26}$')
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
{
5+
"id": "fqr.01JJY7P1AVFGHGVMEDE8T4VWJG",
6+
"rgid": "ATCCACTG+ACGCACCT.2",
7+
"index": "ATCCACTG",
8+
"index2": "ACGCACCT",
9+
"lane": 2,
10+
"instrumentRunId": "230223_A00130_0244_AHN3W3DSX5",
11+
"library": {
12+
"orcabusId": "lib.01JBMVFP45C2EZRVK67P8JY1D2",
13+
"libraryId": "L2300223"
14+
},
15+
"readSet": {
16+
"r1": {
17+
"s3IngestId": "019387bd-2494-7c00-9e41-03e8b6a73306",
18+
"gzipCompressionSizeInBytes": 49532847794,
19+
"rawMd5sum": "19e339fdb3c42f0133f5f3b1f9d188e0", // pragma: allowlist secret
20+
"s3Uri": "s3://archive-prod-fastq-503977275616-ap-southeast-2/v1/year=2023/month=02/230223_A00130_0244_AHN3W3DSX5/202411226f4f7af0/WGS_TsqNano/MDX230039_L2300223_S7_L002_R1_001.fastq.ora"
21+
},
22+
"r2": {
23+
"s3IngestId": "019387bd-9177-79c1-a489-d940ecc11b11",
24+
"gzipCompressionSizeInBytes": 53189277581,
25+
"rawMd5sum": "e857de35a8ca008589d24b2e0f647cc7", // pragma: allowlist secret
26+
"s3Uri": "s3://archive-prod-fastq-503977275616-ap-southeast-2/v1/year=2023/month=02/230223_A00130_0244_AHN3W3DSX5/202411226f4f7af0/WGS_TsqNano/MDX230039_L2300223_S7_L002_R2_001.fastq.ora"
27+
},
28+
"compressionFormat": "ORA"
29+
},
30+
"qc": null,
31+
"readCount": null,
32+
"baseCountEst": null,
33+
"isValid": true,
34+
"ntsm": null
35+
}
36+
"""
37+
38+
from typing import (
39+
TypedDict,
40+
Optional,
41+
Dict,
42+
List
43+
)
44+
45+
46+
class FileStorageObject(TypedDict):
47+
s3IngestId: str
48+
s3Uri: str
49+
50+
51+
class FastqStorageObject(FileStorageObject):
52+
gzipCompressionSizeInBytes: int
53+
rawMd5sum: str
54+
55+
56+
class ReadSet(TypedDict):
57+
r1: FastqStorageObject
58+
r2: FastqStorageObject
59+
compressionFormat: str
60+
61+
62+
class Library(TypedDict):
63+
orcabusId: str
64+
libraryId: str
65+
66+
67+
class FastqListRow(TypedDict):
68+
id: str
69+
index: str
70+
lane: int
71+
instrumentRunId: str
72+
library: Library
73+
readSet: Optional[ReadSet]
74+
qc: Optional[Dict]
75+
readCount: Optional[int]
76+
baseCountEst: Optional[int]
77+
isValid: Optional[bool]
78+
ntsm: Optional[FileStorageObject]
79+
80+
81+
class FastqSet(TypedDict):
82+
id: str
83+
library: Library
84+
fastqSet: List[FastqListRow]
85+
allowAdditionalFastq: bool
86+
isCurrentFastqSet: bool
87+
88+
89+
class QcStats(TypedDict):
90+
insertSizeEstimate: int
91+
rawWgsCoverageEstimate: int
92+
r1Q20Fraction: float
93+
r2Q20Fraction: float
94+
r1GcFraction: float
95+
r2GcFraction: float
96+
97+
98+
class ReadCount(TypedDict):
99+
readCount: int
100+
baseCountEst: int
101+
102+
103+
class FileCompressionInformation(TypedDict):
104+
compressionFormat: str
105+
r1GzipCompressionSizeInBytes: Optional[int]
106+
r2GzipCompressionSizeInBytes: Optional[int]
107+
r1RawMd5sum: Optional[int]
108+
r2RawMd5sum: Optional[int]
109+
110+
111+
CWLFile = TypedDict('CWLFile', {
112+
'class': str,
113+
'location': str
114+
})
115+
116+
117+
class CWLDict(TypedDict):
118+
rgid: str
119+
index: str
120+
index2: Optional[str]
121+
lane: int
122+
read_1: CWLFile
123+
read_2: CWLFile

0 commit comments

Comments
 (0)