Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
znatty22 committed Jan 23, 2024
1 parent ed674ea commit 9200f36
Show file tree
Hide file tree
Showing 3 changed files with 329 additions and 11 deletions.
306 changes: 306 additions & 0 deletions dataservice/api/biospecimen/manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,306 @@
"""
Module to help manage the create/update lifecycle of Samples and Containers
The main method in this module is the `manage_sample_containers` which is
responsible for create/updating Samples and Containers every time a Biospecimen
is created or updated. It gets called in
dataservice.api.biospecimen.resources.py
Background:
The current Biospecimen table does not adequately model the hierarchical
relationship between specimen groups and specimens. The Sample and
Container tables have been created to fill in this gap.
A Sample is a biologically equivalent group of specimens. A Sample has
one or more Containers and a Container essentially mirrors the Biospecimen.
The Sample and Container tables were created in order to minimize any
changes to the existing Biospecimen table.
"""

from marshmallow import ValidationError
from flask import abort
from dataservice.extensions import db
from dataservice.api.sample.models import Sample
from dataservice.api.container.models import Container
from dataservice.api.sample.schemas import (
SampleSchema,
)
from dataservice.api.container.schemas import (
ContainerSchema,
)


def _get_visibility_params(biospecimen):
"""
Helper method to get dict of visibility parameters from the Biospecimen
"""
return {
"visible": biospecimen.visible,
"visibility_reason": biospecimen.visibility_reason,
"visibility_comment": biospecimen.visibility_comment
}


def _create_sample_event_key(biospecimen):
"""
Create a sample event identifier from specific fields on the Biospecimen
Use:
participant_id
external_sample_id
age_at_event_days
Key format: <participant_id>-<external_sample_id>-<age_at_event_days>
If age_at_event_days is null, then use the value "Not Reported"
"""
components = [
biospecimen.participant_id,
biospecimen.external_sample_id,
biospecimen.age_at_event_days
]

return "-".join([str(c) if c else "Not Reported" for c in components])


def _get_sample_identifier(biospecimen):
"""
Helper to extract specific Biospecimen attributes to uniquely
identify a Sample
"""
return {
"sample_event_key": _create_sample_event_key(biospecimen),
"composition": biospecimen.composition,
"tissue_type": biospecimen.source_text_tissue_type,
"analyte_type": biospecimen.analyte_type,
"anatomical_location": biospecimen.source_text_anatomical_site,
"method_of_sample_procurement":
biospecimen.method_of_sample_procurement,
"preservation_method": biospecimen.preservation_method,
"concentration_mg_per_ml": biospecimen.concentration_mg_per_ml
}


def _get_container_identifier(biospecimen):
"""
Helper to extract specific Biospecimen attributes to uniquely identify
a Container
"""
return {
"biospecimen_id": biospecimen.kf_id,
}


def _create_sample(biospecimen):
"""
Create Sample from specific Biospecimen attributes. Validate Sample
"""
# Extract the parameters that uniquely identify a sample
params = _get_sample_identifier(biospecimen)
# Add remaining sample attributes
params.update(
{
"participant_id": biospecimen.participant_id,
"external_id": biospecimen.external_sample_id,
"volume_ul": biospecimen.volume_ul,
}
)
# Set visibility params based on Biospecimen which represents both
# sample and containers
params.update(
_get_visibility_params(biospecimen)
)
# Validate sample parameters and create sample
try:
sample = SampleSchema(strict=True).load(params).data
# Params not valid
except ValidationError as e:
abort(400, 'could not create sample: {}'.format(e.messages))

return sample


def _update_sample(current_sample, biospecimen):
"""
Update Sample using specific Biospecimen attributes. Validate Sample
"""
# Extract the parameters that uniquely identify a sample
params = _get_sample_identifier(biospecimen)
# Add remaining sample attributes
params.update(
{
"participant_id": biospecimen.participant_id,
"external_id": biospecimen.external_sample_id,
"volume_ul": biospecimen.volume_ul,
}
)
# Set visibility params based on Biospecimen which represents both
# sample and containers
params.update(
_get_visibility_params(biospecimen)
)
try:
sample = SampleSchema(strict=True).load(
params, instance=current_sample, partial=True
).data
except ValidationError as e:
abort(400, 'could not update sample: {}'.format(e.messages))

return sample


def _create_container(biospecimen, sample):
"""
Create Container using specific Biospecimen attributes.
Link Container to its associated biospecimen and sample
Validate Container
"""
# Extract the parameters that uniquely identify a sample
params = _get_container_identifier(biospecimen)
# Add remaining sample attributes
params.update(
{
"biospecimen_id": biospecimen.kf_id,
"sample_id": sample.kf_id,
"volume_ul": biospecimen.volume_ul,
"external_id": biospecimen.external_aliquot_id,
}
)
# Set visibility params based on Biospecimen which represents both
# sample and containers
params.update(
_get_visibility_params(biospecimen)
)
try:
container = ContainerSchema(strict=True).load(params).data
except ValidationError as e:
abort(400, 'could not create container: {}'.format(e.messages))

return container


def _update_container(current_container, biospecimen, sample):
"""
Update Container using specific Biospecimen attributes.
Link Container to its associated biospecimen and sample
Validate Container
"""
# Extract the parameters that uniquely identify a container
params = _get_container_identifier(biospecimen)
# Add remaining container attributes
params.update(
{
"biospecimen_id": biospecimen.kf_id,
"sample_id": sample.kf_id,
"volume_ul": biospecimen.volume_ul,
"external_id": biospecimen.external_aliquot_id,
}
)
# Set visibility params based on Biospecimen which represents both
# sample and containers
params.update(
_get_visibility_params(biospecimen)
)
try:
container = ContainerSchema(strict=True).load(
params, instance=current_container, partial=True
).data
except ValidationError as e:
abort(400, 'could not update container: {}'.format(e.messages))

return container


def _upsert_sample(biospecimen):
"""
Upsert Sample from specific Biospecimen attributes
Try to find exisiting Sample first
If it exists, update it using the Biospecimen attributes
If it does not exist, create Sample using the Biospecimen attributes
"""
# Extract biospecimen attributes that uniquely identify a sample
sample_query_params = _get_sample_identifier(biospecimen)

# Find sample if it exists
sample = Sample.query.filter_by(**sample_query_params).first()

# Sample does not exist, create it
if not sample:
sample = _create_sample(biospecimen)
print(f"Created new sample {sample.external_id}!")
# Sample exists, update it
else:
sample = _update_sample(sample, biospecimen)
print(f"Updated sample {sample.external_id}!")

db.session.add(sample)
db.session.commit()

return sample


def _upsert_container(biospecimen, sample):
"""
Upsert Container from specific Biospecimen attributes and link Container
to its associated Sample
Try to find existing Container first
If it exists, update it using the Biospecimen attributes
If it does not exist, create Container using the Biospecimen attributes
"""
# Extract biospecimen attributes that uniquely identify a container
container_query_params = _get_container_identifier(biospecimen)

# Find sample if it exists
container = Container.query.filter_by(**container_query_params).first()

# Container does not exist - create it
if not container:
container = _create_container(biospecimen, sample)
print(f"Created new container {container.external_id}!")
# Container exists - update it
else:
print(f"Updated container {container.external_id}!")
container = _update_container(container, biospecimen, sample)

db.session.add(container)
db.session.commit()

return container


def _update_sample_volume(sample_id):
"""
Update Sample's volume with the sum of all of its container volumes
"""
# Accumulate container volumes and update sample volume
sample_with_containers = Sample.query.get(sample_id)
total_volume = None
for ct in sample_with_containers.containers:
if ct.volume_ul is None:
continue
if total_volume is None:
total_volume = ct.volume_ul
else:
total_volume += ct.volume_ul

sample_with_containers.volume_ul = total_volume

print(f"Sample volume updated: {total_volume}")

db.session.add(sample_with_containers)
db.session.commit()


def manage_sample_containers(biospecimen):
"""
Upsert a Sample and Container from the input Biospecimen
Update the sample's volume with the sum of the container volumes
"""
sample = _upsert_sample(biospecimen)
container = _upsert_container(biospecimen, sample)
_update_sample_volume(sample.kf_id)

return sample
3 changes: 3 additions & 0 deletions dataservice/api/biospecimen/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataservice.api.common.model import Base, KfId
from dataservice.api.biospecimen_genomic_file.models import (
BiospecimenGenomicFile)
from dataservice.api.sample.models import Sample
from dataservice.api.container.models import Container
from dataservice.api.diagnosis.models import Diagnosis
from sqlalchemy.ext.associationproxy import association_proxy
Expand Down Expand Up @@ -215,3 +216,5 @@ def biospecimen_or_diagnosis_on_insert(mapper, connection, target):
Run preprocessing/validation of diagnosis before insert
"""
validate_diagnosis_biospecimen(target)


31 changes: 20 additions & 11 deletions dataservice/api/biospecimen/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)
from dataservice.api.common.views import CRUDView
from dataservice.api.common.schemas import filter_schema_factory
from dataservice.api.biospecimen import manager


class BiospecimenListAPI(CRUDView):
Expand Down Expand Up @@ -92,17 +93,21 @@ def post(self):

# Deserialize
try:
s = BiospecimenSchema(strict=True).load(body).data
biospecimen = BiospecimenSchema(strict=True).load(body).data
# Request body not valid
except ValidationError as e:
abort(400, 'could not create biospecimen: {}'.format(e.messages))

# Add to and save in database
db.session.add(s)
db.session.add(biospecimen)
db.session.commit()

return BiospecimenSchema(201, 'biospecimen {} created'
.format(s.kf_id)).jsonify(s), 201
# Create the Biospecimen's associated Sample and Container
manager.manage_sample_containers(biospecimen)

return BiospecimenSchema(
201, 'biospecimen {} created'.format(biospecimen.kf_id)
).jsonify(biospecimen), 201


class BiospecimenAPI(CRUDView):
Expand Down Expand Up @@ -141,25 +146,29 @@ def patch(self, kf_id):
resource:
Biospecimen
"""
sa = Biospecimen.query.get(kf_id)
if sa is None:
biospecimen = Biospecimen.query.get(kf_id)
if biospecimen is None:
abort(404, 'could not find {} `{}`'
.format('biospecimen', kf_id))

# Partial update - validate but allow missing required fields
body = request.get_json(force=True) or {}
try:
sa = BiospecimenSchema(strict=True).load(body, instance=sa,
partial=True).data
biospecimen = BiospecimenSchema(strict=True).load(
body, instance=biospecimen, partial=True
).data
except ValidationError as err:
abort(400, 'could not update biospecimen: {}'.format(err.messages))

db.session.add(sa)
db.session.add(biospecimen)
db.session.commit()

# Create the Biospecimen's associated Sample and Container
manager.manage_sample_containers(biospecimen)

return BiospecimenSchema(
200, 'biospecimen {} updated'.format(sa.kf_id)
).jsonify(sa), 200
200, 'biospecimen {} updated'.format(biospecimen.kf_id)
).jsonify(biospecimen), 200

def delete(self, kf_id):
"""
Expand Down

0 comments on commit 9200f36

Please sign in to comment.