Skip to content

Commit

Permalink
Merge pull request #284 from UC-Davis-molecular-computing/283-deal-wi…
Browse files Browse the repository at this point in the history
…th-non-unique-modification-vendor-codes

fixes #283: deal with non-unique Modification vendor codes
  • Loading branch information
dave-doty authored Sep 3, 2023
2 parents fc30862 + 37fa350 commit e4cea7c
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 49 deletions.
129 changes: 94 additions & 35 deletions scadnano/scadnano.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,7 +858,10 @@ def m13(rotation: int = 5587, variant: M13Variant = M13Variant.p7249) -> str:
scaffold_key = 'scaffold'
helices_view_order_key = 'helices_view_order'
is_origami_key = 'is_origami'
design_modifications_key = 'modifications_in_design'
design_modifications_key = 'modifications_in_design' # legacy key for when we stored all mods in one dict
design_modifications_5p_key = 'modifications_5p_in_design'
design_modifications_3p_key = 'modifications_3p_in_design'
design_modifications_int_key = 'modifications_int_in_design'
geometry_key = 'geometry'
groups_key = 'groups'

Expand Down Expand Up @@ -966,12 +969,22 @@ class ModificationType(enum.Enum):
five_prime = "5'"
"""5' modification type"""

three_prime = "5'"
three_prime = "3'"
"""3' modification type"""

internal = "internal"
"""internal modification type"""

def key(self) -> str:
if self == ModificationType.five_prime:
return design_modifications_5p_key
elif self == ModificationType.three_prime:
return design_modifications_3p_key
elif self == ModificationType.internal:
return design_modifications_int_key
else:
raise AssertionError(f'unknown ModificationType {self}')


@dataclass(frozen=True, eq=True)
class Modification(_JSONSerializable, ABC):
Expand Down Expand Up @@ -3962,16 +3975,20 @@ def default_export_name(self, unique_names: bool = False) -> str:
name = f'{start_helix}[{start_offset}]{forward_str}{end_helix}[{end_offset}]'
return f'SCAF{name}' if self.is_scaffold else f'ST{name}'

def set_modification_5p(self, mod: Modification5Prime = None) -> None:
"""Sets 5' modification to be `mod`. `mod` cannot be non-None if :any:`Strand.circular` is True."""
if self.circular and mod is not None:
def set_modification_5p(self, mod: Modification5Prime) -> None:
"""Sets 5' modification to be `mod`. :any:`Strand.circular` must be False."""
if self.circular:
raise StrandError(self, "cannot have a 5' modification on a circular strand")
if not isinstance(mod, Modification5Prime):
raise TypeError(f'mod must be a Modification5Prime but it is type {type(mod)}: {mod}')
self.modification_5p = mod

def set_modification_3p(self, mod: Modification3Prime = None) -> None:
"""Sets 3' modification to be `mod`. `mod` cannot be non-None if :any:`Strand.circular` is True."""
def set_modification_3p(self, mod: Modification3Prime) -> None:
"""Sets 3' modification to be `mod`. :any:`Strand.circular` must be False."""
if self.circular and mod is not None:
raise StrandError(self, "cannot have a 3' modification on a circular strand")
if not isinstance(mod, Modification3Prime):
raise TypeError(f'mod must be a Modification3Prime but it is type {type(mod)}: {mod}')
self.modification_3p = mod

def remove_modification_5p(self) -> None:
Expand Down Expand Up @@ -3999,6 +4016,8 @@ def set_modification_internal(self, idx: int, mod: ModificationInternal,
elif warn_on_no_dna:
print('WARNING: no DNA sequence has been assigned, so certain error checks on the internal '
'modification were not done. To be safe, first assign DNA, then add the modifications.')
if not isinstance(mod, ModificationInternal):
raise TypeError(f'mod must be a ModificationInternal but it is type {type(mod)}: {mod}')
self.modifications_int[idx] = mod

def remove_modification_internal(self, idx: int) -> None:
Expand Down Expand Up @@ -5763,10 +5782,27 @@ def from_scadnano_json_map(
strand = Strand.from_json(strand_json)
strands.append(strand)

# modifications in whole design
mods_5p: Dict[str, Modification5Prime] = {}
mods_3p: Dict[str, Modification3Prime] = {}
mods_int: Dict[str, ModificationInternal] = {}
for all_mods_key, mods in zip([design_modifications_5p_key,
design_modifications_3p_key,
design_modifications_int_key], [mods_5p, mods_3p, mods_int]):
if all_mods_key in json_map:
all_mods_json = json_map[all_mods_key]
for mod_key, mod_json in all_mods_json.items():
mod = Modification.from_json(mod_json)
if mod_key != mod.vendor_code:
print(f'WARNING: key {mod_key} does not match vendor_code field {mod.vendor_code}'
f'for modification {mod}\n'
f'replacing with key = {mod.vendor_code}')
mod = dataclasses.replace(mod, vendor_code=mod_key)
mods[mod_key] = mod

# legacy code; now we stored modifications in 3 separate dicts depending on 5', 3', internal
all_mods: Dict[str, Modification] = {}
if design_modifications_key in json_map:
all_mods_json = json_map[design_modifications_key]
all_mods = {}
for mod_key, mod_json in all_mods_json.items():
mod = Modification.from_json(mod_json)
if mod_key != mod.vendor_code:
Expand All @@ -5775,7 +5811,8 @@ def from_scadnano_json_map(
f'replacing with key = {mod.vendor_code}')
mod = dataclasses.replace(mod, vendor_code=mod_key)
all_mods[mod_key] = mod
Design.assign_modifications_to_strands(strands, strand_jsons, all_mods)

Design.assign_modifications_to_strands(strands, strand_jsons, mods_5p, mods_3p, mods_int, all_mods)

geometry = None
if geometry_key in json_map:
Expand Down Expand Up @@ -5831,19 +5868,25 @@ def to_json_serializable(self, suppress_indent: bool = True, **kwargs: Any) -> D
self.helices_view_order) if suppress_indent else self.helices_view_order

# modifications
mods = self.modifications()
if len(mods) > 0:
mods_dict = {}
for mod in mods:
if mod.vendor_code not in mods_dict:
mods_dict[mod.vendor_code] = mod.to_json_serializable(suppress_indent)
else:
if mod != mods_dict[mod.vendor_code]:
raise IllegalDesignError(f"Modifications must have unique vendor codes, but I found"
f"two different Modifications that share vendor code "
f"{mod.vendor_code}:\n{mod}\nand\n"
f"{mods_dict[mod.vendor_code]}")
dct[design_modifications_key] = mods_dict
for mod_type in [ModificationType.five_prime,
ModificationType.three_prime,
ModificationType.internal]:
mods = self.modifications(mod_type)
mod_key = mod_type.key()
if len(mods) > 0:
mods_dict = {}
for mod in mods:
if mod.vendor_code not in mods_dict:
mods_dict[mod.vendor_code] = mod.to_json_serializable(suppress_indent)
else:
if mod != mods_dict[mod.vendor_code]:
raise IllegalDesignError(
f"Modifications of type {mod_type} must have unique vendor codes, "
f"but I foundtwo different Modifications of that type "
f"that share vendor code "
f"{mod.vendor_code}:\n{mod}\nand\n"
f"{mods_dict[mod.vendor_code]}")
dct[mod_key] = mods_dict

dct[strands_key] = [strand.to_json_serializable(suppress_indent) for strand in self.strands]

Expand Down Expand Up @@ -5940,19 +5983,34 @@ def base_pairs(self, allow_mismatches: bool = False) -> Dict[int, List[int]]:

@staticmethod
def assign_modifications_to_strands(strands: List[Strand], strand_jsons: List[dict],
mods_5p: Dict[str, Modification5Prime],
mods_3p: Dict[str, Modification3Prime],
mods_int: Dict[str, ModificationInternal],
all_mods: Dict[str, Modification]) -> None:
if len(all_mods) > 0: # legacy code for when modifications were stored in a single dict
assert len(mods_5p) == 0 and len(mods_3p) == 0 and len(mods_int) == 0
legacy = True
elif len(mods_5p) > 0 or len(mods_3p) > 0 or len(mods_int) > 0:
assert len(all_mods) == 0
legacy = False
else: # no modifications
return

for strand, strand_json in zip(strands, strand_jsons):
if modification_5p_key in strand_json:
mod_name = strand_json[modification_5p_key]
strand.modification_5p = cast(Modification5Prime, all_mods[mod_name])
mod_code = strand_json[modification_5p_key]
strand.modification_5p = cast(Modification5Prime, all_mods[mod_code]) \
if legacy else mods_5p[mod_code]
if modification_3p_key in strand_json:
mod_name = strand_json[modification_3p_key]
strand.modification_3p = cast(Modification3Prime, all_mods[mod_name])
mod_code = strand_json[modification_3p_key]
strand.modification_3p = cast(Modification3Prime, all_mods[mod_code]) \
if legacy else mods_3p[mod_code]
if modifications_int_key in strand_json:
mod_names_by_offset = strand_json[modifications_int_key]
for offset_str, mod_name in mod_names_by_offset.items():
for offset_str, mod_code in mod_names_by_offset.items():
offset = int(offset_str)
strand.modifications_int[offset] = cast(ModificationInternal, all_mods[mod_name])
strand.modifications_int[offset] = cast(ModificationInternal, all_mods[mod_code]) \
if legacy else mods_int[mod_code]

@staticmethod
def _cadnano_v2_import_find_5_end(vstrands: VStrands, strand_type: str, helix_num: int, base_id: int,
Expand Down Expand Up @@ -6079,7 +6137,7 @@ def _cadnano_v2_import_explore_domains(vstrands: VStrands, seen: Dict[Tuple[int,
@staticmethod
def _cadnano_v2_import_circular_strands_merge_first_last_domains(domains: List[Domain]) -> None:
""" When we create domains for circular strands in the cadnano import routine, we may end up
with a fake crossover if first and last domain are on same helix, we have to merge them
with a fake crossover if first and last domain are on same helix, we have to merge them
if it is the case.
"""
if domains[0].helix != domains[-1].helix:
Expand Down Expand Up @@ -6210,9 +6268,9 @@ def from_cadnano_v2(directory: str = '', filename: Optional[str] = None,
# TS: Dave, I have thorougly checked the code of Design constructor and the order of the helices
# IS lost even if the helices were give as a list.
# Indeed, you very early call `_normalize_helices_as_dict` in the constructor the order is lost.
# Later in the code, if no view order was given the code will choose the identity
# Later in the code, if no view order was given the code will choose the identity
# in function `_check_helices_view_order_and_return`.
# Conclusion: do not assume that your constructor code deals with the ordering, even if
# Conclusion: do not assume that your constructor code deals with the ordering, even if
# input helices is a list. I am un commenting the below:
design.set_helices_view_order([num for num in helices])

Expand Down Expand Up @@ -7641,7 +7699,7 @@ def _write_plates_default(self, directory: str, filename: Optional[str], strands

# IDT charges extra for a plate with < 24 strands for 96-well plate
# or < 96 strands for 384-well plate.
# So if we would have fewer than that many on the last plate,
# So if we would have fewer than that many on the last plate,
# shift some from the penultimate plate.
if not on_final_plate and \
final_plate_less_than_min_required and \
Expand Down Expand Up @@ -7670,7 +7728,7 @@ def to_oxview_format(self, warn_duplicate_strand_names: bool = True,
have duplicate names. (default: True)
:param use_strand_colors:
if True (default), sets the color of each nucleotide in a strand in oxView to the color
of the strand.
of the strand.
"""
import datetime
self._check_legal_design(warn_duplicate_strand_names)
Expand Down Expand Up @@ -8184,7 +8242,8 @@ def ligate(self, helix: int, offset: int, forward: bool) -> None:
strand_3p.domains.append(dom_new)
strand_3p.domains.extend(strand_5p.domains[1:])
strand_3p.is_scaffold = strand_left.is_scaffold or strand_right.is_scaffold
strand_3p.set_modification_3p(strand_5p.modification_3p)
if strand_5p.modification_3p is not None:
strand_3p.set_modification_3p(strand_5p.modification_3p)
for idx, mod in strand_5p.modifications_int.items():
new_idx = idx + strand_3p.dna_length()
strand_3p.set_modification_internal(new_idx, mod)
Expand Down
33 changes: 19 additions & 14 deletions tests/scadnano_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,16 +637,17 @@ def test_to_json__names_unique_for_modifications_raises_no_error(self) -> None:
sc.Modification3Prime(display_text=name, vendor_code=name + '3'))
design.to_json(True)

def test_to_json__names_not_unique_for_modifications_raises_error(self) -> None:
def test_to_json__names_not_unique_for_modifications_5p_raises_error(self) -> None:
helices = [sc.Helix(max_offset=100)]
design: sc.Design = sc.Design(helices=helices, strands=[], grid=sc.square)
name = 'mod_name'
code1 = 'mod_code1'
code2 = 'mod_code2'
design.draw_strand(0, 0).move(5).with_modification_5p(
sc.Modification5Prime(display_text=name, vendor_code=name))
design.draw_strand(0, 5).move(5).with_modification_3p(
sc.Modification3Prime(display_text=name, vendor_code=name))
sc.Modification5Prime(display_text=code1, vendor_code=code1))
design.draw_strand(0, 5).move(5).with_modification_5p(
sc.Modification5Prime(display_text=code2, vendor_code=code1))
with self.assertRaises(sc.IllegalDesignError):
design.to_json(True)
design.to_json(False)

def test_mod_illegal_exceptions_raised(self) -> None:
strand = sc.Strand(domains=[sc.Domain(0, True, 0, 5)], dna_sequence='AATGC')
Expand Down Expand Up @@ -793,18 +794,22 @@ def test_to_json_serializable(self) -> None:
# print(design.to_json())

json_dict = design.to_json_serializable(suppress_indent=False)
self.assertTrue(sc.design_modifications_key in json_dict)
mods_dict = json_dict[sc.design_modifications_key]
self.assertTrue(r'/5Biosg/' in mods_dict)
self.assertTrue(r'/3Bio/' in mods_dict)
self.assertTrue(r'/iBiodT/' in mods_dict)

biotin5_json = mods_dict[r'/5Biosg/']
self.assertTrue(sc.design_modifications_5p_key in json_dict)
self.assertTrue(sc.design_modifications_3p_key in json_dict)
self.assertTrue(sc.design_modifications_int_key in json_dict)
mods_5p_dict = json_dict[sc.design_modifications_5p_key]
self.assertTrue(r'/5Biosg/' in mods_5p_dict)
mods_3p_dict = json_dict[sc.design_modifications_3p_key]
self.assertTrue(r'/3Bio/' in mods_3p_dict)
mods_int_dict = json_dict[sc.design_modifications_int_key]
self.assertTrue(r'/iBiodT/' in mods_int_dict)

biotin5_json = mods_5p_dict[r'/5Biosg/']
self.assertEqual('/5Biosg/', biotin5_json[sc.mod_vendor_code_key])
self.assertEqual('B', biotin5_json[sc.mod_display_text_key])
self.assertEqual(6, biotin5_json[sc.mod_connector_length_key])

biotin3_json = mods_dict[r'/3Bio/']
biotin3_json = mods_3p_dict[r'/3Bio/']
self.assertEqual('/3Bio/', biotin3_json[sc.mod_vendor_code_key])
self.assertEqual('B', biotin3_json[sc.mod_display_text_key])
self.assertNotIn(sc.mod_connector_length_key, biotin3_json)
Expand Down

0 comments on commit e4cea7c

Please sign in to comment.