diff --git a/README.md b/README.md index 751cd42..45ed5ff 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +- Adds cloning and recursion functions to python. [#96](https://github.com/Koeng101/dnadesign/pull/96) - Adds recursive fragmentation. [#92](https://github.com/Koeng101/dnadesign/pull/92) - Updated megamash documentation to be more specific. [#91](https://github.com/Koeng101/dnadesign/pull/91) - Adds automatic python documentation generation. [#88](https://github.com/Koeng101/dnadesign/pull/88) diff --git a/lib/clone/example_test.go b/lib/clone/example_test.go index c0f7fd4..d07d23b 100644 --- a/lib/clone/example_test.go +++ b/lib/clone/example_test.go @@ -5,7 +5,6 @@ import ( "log" "github.com/koeng101/dnadesign/lib/clone" - "github.com/koeng101/dnadesign/lib/seqhash" ) func ExampleGoldenGate() { @@ -24,6 +23,6 @@ func ExampleGoldenGate() { log.Fatalf("Failed to GoldenGate. Got error: %s", err) } - fmt.Println(seqhash.RotateSequence(plasmid)) - // Output: AAAAAAAGGATCTCAAGAAGGCCTACTATTAGCAACAACGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAACCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGGTTCAGGTGGAGTGGGAGAAACACGTGGCAAACATTCCGGTCTCAAATGGAAAAGAGCAACGAAACCAACGGCTACCTTGACAGCGCTCAAGCCGGCCCTGCAGCTGGCCCGGGCGCTCCGGGTACCGCCGCGGGTCGTGCACGTCGTTGCGCGGGCTTCCTGCGGCGCCAAGCGCTGGTGCTGCTCACGGTGTCTGGTGTTCTGGCAGGCGCCGGTTTGGGCGCGGCACTGCGTGGGCTCAGCCTGAGCCGCACCCAGGTCACCTACCTGGCCTTCCCCGGCGAGATGCTGCTCCGCATGCTGCGCATGATCATCCTGCCGCTGGTGGTCTGCAGCCTGGTGTCGGGCGCCGCCTCCCTCGATGCCAGCTGCCTCGGGCGTCTGGGCGGTATCGCTGTCGCCTACTTTGGCCTCACCACACTGAGTGCCTCGGCGCTCGCCGTGGCCTTGGCGTTCATCATCAAGCCAGGATCCGGTGCGCAGACCCTTCAGTCCAGCGACCTGGGGCTGGAGGACTCGGGGCCTCCTCCTGTCCCCAAAGAAACGGTGGACTCTTTCCTCGACCTGGCCAGAAACCTGTTTCCCTCCAATCTTGTGGTTGCAGCTTTCCGTACGTATGCAACCGATTATAAAGTCGTGACCCAGAACAGCAGCTCTGGAAATGTAACCCATGAAAAGATCCCCATAGGCACTGAGATAGAAGGGATGAACATTTTAGGATTGGTCCTGTTTGCTCTGGTGTTAGGAGTGGCCTTAAAGAAACTAGGCTCCGAAGGAGAGGACCTCATCCGTTTCTTCAATTCCCTCAACGAGGCGACGATGGTGCTGGTGTCCTGGATTATGTGGTACGTACCTGTGGGCATCATGTTCCTTGTTGGAAGCAAGATCGTGGAAATGAAAGACATCATCGTGCTGGTGACCAGCCTGGGGAAATACATCTTCGCATCTATATTGGGCCACGTCATTCATGGTGGTATCGTCCTGCCGCTGATTTATTTTGTTTTCACACGAAAAAACCCATTCAGATTCCTCCTGGGCCTCCTCGCCCCATTTGCGACAGCATTTGCTACGTGCTCCAGCTCAGCGACCCTTCCCTCTATGATGAAGTGCATTGAAGAGAACAATGGTGTGGACAAGAGGATCTCCAGGTTTATTCTCCCCATCGGGGCCACCGTGAACATGGACGGAGCAGCCATCTTCCAGTGTGTGGCCGCGGTGTTCATTGCGCAACTCAACAACGTAGAGCTCAACGCAGGACAGATTTTCACCATTCTAGTGACTGCCACAGCGTCCAGTGTTGGAGCAGCAGGCGTGCCAGCTGGAGGGGTCCTCACCATTGCCATTATCCTGGAGGCCATTGGGCTGCCTACTCATGATCTGCCTCTGATCCTGGCTGTGGACTGGATTGTGGACCGGACCACCACGGTGGTGAATGTGGAAGGGGATGCCCTGGGTGCAGGCATTCTCCACCACCTGAATCAGAAGGCAACAAAGAAAGGCGAGCAGGAACTTGCTGAGGTGAAAGTGGAAGCCATCCCCAACTGCAAGTCTGAGGAGGAAACCTCGCCCCTGGTGACACACCAGAACCCCGCTGGCCCCGTGGCCAGTGCCCCAGAACTGGAATCCAAGGAGTCGGTTCTGTGAAGAGCTTAGAGACCGACGACTGCCTAAGGACATTCGCTGAGGTGTCAATCGTCGGAGCCGCTGAGCAATAACTAGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGGTTTTTTGCATGGTCATAGCTGTTTCCTGAGAGCTTGGCAGGTGATGACACACATTAACAAATTTCGTGAGGAGTCTCCAGAAGAATGCCATTAATTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAG + fmt.Println(plasmid) + // Output: GGAGAAACACGTGGCAAACATTCCGGTCTCAAATGGAAAAGAGCAACGAAACCAACGGCTACCTTGACAGCGCTCAAGCCGGCCCTGCAGCTGGCCCGGGCGCTCCGGGTACCGCCGCGGGTCGTGCACGTCGTTGCGCGGGCTTCCTGCGGCGCCAAGCGCTGGTGCTGCTCACGGTGTCTGGTGTTCTGGCAGGCGCCGGTTTGGGCGCGGCACTGCGTGGGCTCAGCCTGAGCCGCACCCAGGTCACCTACCTGGCCTTCCCCGGCGAGATGCTGCTCCGCATGCTGCGCATGATCATCCTGCCGCTGGTGGTCTGCAGCCTGGTGTCGGGCGCCGCCTCCCTCGATGCCAGCTGCCTCGGGCGTCTGGGCGGTATCGCTGTCGCCTACTTTGGCCTCACCACACTGAGTGCCTCGGCGCTCGCCGTGGCCTTGGCGTTCATCATCAAGCCAGGATCCGGTGCGCAGACCCTTCAGTCCAGCGACCTGGGGCTGGAGGACTCGGGGCCTCCTCCTGTCCCCAAAGAAACGGTGGACTCTTTCCTCGACCTGGCCAGAAACCTGTTTCCCTCCAATCTTGTGGTTGCAGCTTTCCGTACGTATGCAACCGATTATAAAGTCGTGACCCAGAACAGCAGCTCTGGAAATGTAACCCATGAAAAGATCCCCATAGGCACTGAGATAGAAGGGATGAACATTTTAGGATTGGTCCTGTTTGCTCTGGTGTTAGGAGTGGCCTTAAAGAAACTAGGCTCCGAAGGAGAGGACCTCATCCGTTTCTTCAATTCCCTCAACGAGGCGACGATGGTGCTGGTGTCCTGGATTATGTGGTACGTACCTGTGGGCATCATGTTCCTTGTTGGAAGCAAGATCGTGGAAATGAAAGACATCATCGTGCTGGTGACCAGCCTGGGGAAATACATCTTCGCATCTATATTGGGCCACGTCATTCATGGTGGTATCGTCCTGCCGCTGATTTATTTTGTTTTCACACGAAAAAACCCATTCAGATTCCTCCTGGGCCTCCTCGCCCCATTTGCGACAGCATTTGCTACGTGCTCCAGCTCAGCGACCCTTCCCTCTATGATGAAGTGCATTGAAGAGAACAATGGTGTGGACAAGAGGATCTCCAGGTTTATTCTCCCCATCGGGGCCACCGTGAACATGGACGGAGCAGCCATCTTCCAGTGTGTGGCCGCGGTGTTCATTGCGCAACTCAACAACGTAGAGCTCAACGCAGGACAGATTTTCACCATTCTAGTGACTGCCACAGCGTCCAGTGTTGGAGCAGCAGGCGTGCCAGCTGGAGGGGTCCTCACCATTGCCATTATCCTGGAGGCCATTGGGCTGCCTACTCATGATCTGCCTCTGATCCTGGCTGTGGACTGGATTGTGGACCGGACCACCACGGTGGTGAATGTGGAAGGGGATGCCCTGGGTGCAGGCATTCTCCACCACCTGAATCAGAAGGCAACAAAGAAAGGCGAGCAGGAACTTGCTGAGGTGAAAGTGGAAGCCATCCCCAACTGCAAGTCTGAGGAGGAAACCTCGCCCCTGGTGACACACCAGAACCCCGCTGGCCCCGTGGCCAGTGCCCCAGAACTGGAATCCAAGGAGTCGGTTCTGTGAAGAGCTTAGAGACCGACGACTGCCTAAGGACATTCGCTGAGGTGTCAATCGTCGGAGCCGCTGAGCAATAACTAGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGGTTTTTTGCATGGTCATAGCTGTTTCCTGAGAGCTTGGCAGGTGATGACACACATTAACAAATTTCGTGAGGAGTCTCCAGAAGAATGCCATTAATTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGGCCTACTATTAGCAACAACGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAACCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGGTTCAGGTGGAGTG } diff --git a/py/dnadesign/clone.py b/py/dnadesign/clone.py new file mode 100644 index 0000000..efbffcc --- /dev/null +++ b/py/dnadesign/clone.py @@ -0,0 +1,69 @@ +from typing import List, Optional +from .cffi_bindings import ffi, lib + +class Part: + def __init__(self, sequence: str, circular: bool): + self.sequence = sequence + self.circular = circular + +class Fragment: + def __init__(self, sequence: str, forward_overhang: str, reverse_overhang: str): + self.sequence = sequence + self.forward_overhang = forward_overhang + self.reverse_overhang = reverse_overhang + +def _create_c_string(python_string: str): + return ffi.new("char[]", python_string.encode('utf-8')) + +def _create_c_part(part: Part): + return {"sequence": _create_c_string(part.sequence), "circular": ffi.cast("int", int(part.circular))} + +def _create_c_fragment(fragment: Fragment): + return { + "sequence": _create_c_string(fragment.sequence), + "forward_overhang": _create_c_string(fragment.forward_overhang), + "reverse_overhang": _create_c_string(fragment.reverse_overhang) + } + +def _fragment_from_c(c_fragment): + return Fragment( + ffi.string(c_fragment.sequence).decode('utf-8'), + ffi.string(c_fragment.forward_overhang).decode('utf-8'), + ffi.string(c_fragment.reverse_overhang).decode('utf-8') + ) + +def cut_with_enzyme_by_name(part: Part, directional: bool, name: str, methylated: bool) -> List[Fragment]: + c_part = ffi.new("Part*", _create_c_part(part)) + c_name = _create_c_string(name) + c_directional = ffi.cast("int", int(directional)) + c_methylated = ffi.cast("int", int(methylated)) + + result = lib.CutWithEnzymeByName(c_part[0], c_directional, c_name, c_methylated) + if result.error != ffi.NULL: + raise Exception(ffi.string(result.error).decode('utf-8')) + + fragments = [_fragment_from_c(result.fragments[i]) for i in range(result.size)] + return fragments + +def ligate(fragments: List[Fragment], circular: bool) -> str: + c_fragments = ffi.new("Fragment[]", [_create_c_fragment(f) for f in fragments]) + c_fragment_count = ffi.cast("int", len(fragments)) + c_circular = ffi.cast("int", int(circular)) + + result = lib.Ligate(c_fragments, c_fragment_count, c_circular) + if result.error != ffi.NULL: + raise Exception(ffi.string(result.error).decode('utf-8')) + + return ffi.string(result.ligation).decode('utf-8') + +def golden_gate(sequences: List[Part], cutting_enzyme_name: str, methylated: bool) -> str: + c_parts = ffi.new("Part[]", [_create_c_part(part) for part in sequences]) + c_sequence_count = ffi.cast("int", len(sequences)) + c_cutting_enzyme_name = _create_c_string(cutting_enzyme_name) + c_methylated = ffi.cast("int", int(methylated)) + + result = lib.GoldenGate(c_parts, c_sequence_count, c_cutting_enzyme_name, c_methylated) + if result.error != ffi.NULL: + raise Exception(ffi.string(result.error).decode('utf-8')) + + return ffi.string(result.ligation).decode('utf-8') diff --git a/py/dnadesign/definitions.h b/py/dnadesign/definitions.h index 1602e30..ce1b8d2 100644 --- a/py/dnadesign/definitions.h +++ b/py/dnadesign/definitions.h @@ -137,3 +137,79 @@ typedef struct { GenbankResult ParseGenbankFromCFile(void* cfile); GenbankResult ParseGenbankFromCString(char* cstring); + +// Part, Fragment, and Assembly definitions +typedef struct { + char* sequence; + int circular; +} Part; + +typedef struct { + char* sequence; + char* forward_overhang; + char* reverse_overhang; +} Fragment; + +typedef struct { + char* sequence; + char** fragments; + int fragmentCount; + double efficiency; + void* subAssemblies; + int subAssemblyCount; +} Assembly; + +// New struct definitions for function outputs +typedef struct { + Fragment* fragments; + int size; + char* error; +} FragmentResult; + +typedef struct { + char* ligation; + int* ligationPattern; + int ligationPatternSize; + char* error; +} LigationResult; + +typedef struct { + char** overhangs; + double* efficiencies; + int size; + char* error; +} OverhangResult; + +typedef struct { + char** fragments; + int size; + double efficiency; + char* error; +} FragmentSequenceResult; + +typedef struct { + Assembly* assembly; + char* error; +} RecursiveFragmentSequenceResult; + +// Function declarations +FragmentResult CutWithEnzymeByName(Part part, int directional, char* name, + int methylated); +LigationResult Ligate(Fragment* fragments, int fragmentCount, int circular); +LigationResult GoldenGate(Part* sequences, int sequenceCount, + char* cuttingEnzymeName, int methylated); +double SetEfficiency(char** overhangs, int overhangCount); +OverhangResult NextOverhangs(char** currentOverhangs, int overhangCount); +char* NextOverhang(char** currentOverhangs, int overhangCount); +FragmentSequenceResult FragmentSequence(char* sequence, int minFragmentSize, + int maxFragmentSize, + char** excludeOverhangs, + int excludeOverhangCount); +FragmentSequenceResult FragmentSequenceWithOverhangs( + char* sequence, int minFragmentSize, int maxFragmentSize, + char** excludeOverhangs, int excludeOverhangCount, char** includeOverhangs, + int includeOverhangCount); +RecursiveFragmentSequenceResult RecursiveFragmentSequence( + char* sequence, int maxCodingSizeOligo, int* assemblyPattern, + int patternCount, char** excludeOverhangs, int excludeCount, + char** includeOverhangs, int includeCount); diff --git a/py/dnadesign/fragment.py b/py/dnadesign/fragment.py new file mode 100644 index 0000000..cf966cb --- /dev/null +++ b/py/dnadesign/fragment.py @@ -0,0 +1,90 @@ +from typing import List, Tuple +from .cffi_bindings import ffi, lib + +class Assembly: + def __init__(self, sequence: str, fragments: List[str], efficiency: float, sub_assemblies: List['Assembly']): + self.sequence = sequence + self.fragments = fragments + self.efficiency = efficiency + self.sub_assemblies = sub_assemblies + +def _create_c_string_array(python_strings: List[str]): + c_strings = [ffi.new("char[]", s.encode('utf-8')) for s in python_strings] + c_array = ffi.new("char *[]", c_strings) + return c_array, c_strings # Return c_strings to keep them alive + +def _c_string_array_to_python(c_array, size): + return [ffi.string(c_array[i]).decode('utf-8') for i in range(size)] + +def set_efficiency(overhangs: List[str]) -> float: + c_overhangs, _ = _create_c_string_array(overhangs) + return lib.SetEfficiency(c_overhangs, len(overhangs)) + +def next_overhangs(current_overhangs: List[str]) -> Tuple[List[str], List[float]]: + c_overhangs, _ = _create_c_string_array(current_overhangs) + result = lib.NextOverhangs(c_overhangs, len(current_overhangs)) + + if result.error != ffi.NULL: + raise Exception(ffi.string(result.error).decode('utf-8')) + + overhangs = _c_string_array_to_python(result.overhangs, result.size) + efficiencies = [result.efficiencies[i] for i in range(result.size)] + return overhangs, efficiencies + +def next_overhang(current_overhangs: List[str]) -> str: + c_overhangs, _ = _create_c_string_array(current_overhangs) + result = lib.NextOverhang(c_overhangs, len(current_overhangs)) + return ffi.string(result).decode('utf-8') + +def fragment(sequence: str, min_fragment_size: int, max_fragment_size: int, exclude_overhangs: List[str]) -> Tuple[List[str], float, str]: + c_sequence = ffi.new("char[]", sequence.encode('utf-8')) + c_exclude_overhangs, _ = _create_c_string_array(exclude_overhangs) + + result = lib.FragmentSequence(c_sequence, min_fragment_size, max_fragment_size, c_exclude_overhangs, len(exclude_overhangs)) + + if result.error != ffi.NULL: + error = ffi.string(result.error).decode('utf-8') + return [], 0.0, error + + fragments = _c_string_array_to_python(result.fragments, result.size) + return fragments, result.efficiency, None + +def fragment_with_overhangs(sequence: str, min_fragment_size: int, max_fragment_size: int, + exclude_overhangs: List[str], include_overhangs: List[str]) -> Tuple[List[str], float, str]: + c_sequence = ffi.new("char[]", sequence.encode('utf-8')) + c_exclude_overhangs, _ = _create_c_string_array(exclude_overhangs) + c_include_overhangs, _ = _create_c_string_array(include_overhangs) + + result = lib.FragmentSequenceWithOverhangs(c_sequence, min_fragment_size, max_fragment_size, + c_exclude_overhangs, len(exclude_overhangs), + c_include_overhangs, len(include_overhangs)) + + if result.error != ffi.NULL: + error = ffi.string(result.error).decode('utf-8') + return [], 0.0, error + + fragments = _c_string_array_to_python(result.fragments, result.size) + return fragments, result.efficiency, None + +def _assembly_from_c(c_assembly) -> Assembly: + sequence = ffi.string(c_assembly.sequence).decode('utf-8') + fragments = _c_string_array_to_python(c_assembly.fragments, c_assembly.fragmentCount) + efficiency = c_assembly.efficiency + sub_assemblies = [_assembly_from_c(c_assembly.subAssemblies[i]) for i in range(c_assembly.subAssemblyCount)] + return Assembly(sequence, fragments, efficiency, sub_assemblies) + +def recursive_fragment(sequence: str, max_coding_size_oligo: int, assembly_pattern: List[int], + exclude_overhangs: List[str], include_overhangs: List[str]) -> Assembly: + c_sequence = ffi.new("char[]", sequence.encode('utf-8')) + c_assembly_pattern = ffi.new("int[]", assembly_pattern) + c_exclude_overhangs, _ = _create_c_string_array(exclude_overhangs) + c_include_overhangs, _ = _create_c_string_array(include_overhangs) + + result = lib.RecursiveFragmentSequence(c_sequence, max_coding_size_oligo, c_assembly_pattern, len(assembly_pattern), + c_exclude_overhangs, len(exclude_overhangs), + c_include_overhangs, len(include_overhangs)) + + if result.error != ffi.NULL: + raise Exception(ffi.string(result.error).decode('utf-8')) + + return _assembly_from_c(result.assembly) diff --git a/py/lib.go b/py/lib.go index e6a5aef..bc5acd2 100644 --- a/py/lib.go +++ b/py/lib.go @@ -112,15 +112,41 @@ typedef struct { int feature_count; char* sequence; } Genbank; + +// Part +typedef struct { + char* sequence; + int circular; +} Part; + +// Fragment +typedef struct { + char* sequence; + char* forward_overhang; + char* reverse_overhang; +} Fragment; + +// Assembly +typedef struct { + char* sequence; + char** fragments; + int fragmentCount; + double efficiency; + void* subAssemblies; + int subAssemblyCount; +} Assembly; */ import "C" import ( + "fmt" "io" "strings" "unsafe" "github.com/koeng101/dnadesign/lib/bio" "github.com/koeng101/dnadesign/lib/bio/genbank" + "github.com/koeng101/dnadesign/lib/clone" + "github.com/koeng101/dnadesign/lib/synthesis/fragment" ) /****************************************************************************** @@ -433,6 +459,268 @@ func ParseGenbankFromCString(cstring *C.char) (*C.Genbank, int, *C.char) { return goGenbankToCGenbank(genbanks) } +/****************************************************************************** +Aug 28, 2024 + +Clone Package Functions + +******************************************************************************/ + +//export CutWithEnzymeByName +func CutWithEnzymeByName(part C.Part, directional C.int, name *C.char, methylated C.int) (*C.Fragment, C.int, *C.char) { + goPart := clone.Part{ + Sequence: C.GoString(part.sequence), + Circular: part.circular != 0, + } + fragments, err := clone.CutWithEnzymeByName(goPart, directional != 0, C.GoString(name), methylated != 0) + if err != nil { + return nil, 0, C.CString(err.Error()) + } + + cFragments := (*C.Fragment)(C.malloc(C.size_t(len(fragments)) * C.size_t(unsafe.Sizeof(C.Fragment{})))) + slice := (*[1<<30 - 1]C.Fragment)(unsafe.Pointer(cFragments))[:len(fragments):len(fragments)] + + for i, frag := range fragments { + slice[i].sequence = C.CString(frag.Sequence) + slice[i].forward_overhang = C.CString(frag.ForwardOverhang) + slice[i].reverse_overhang = C.CString(frag.ReverseOverhang) + } + + return cFragments, C.int(len(fragments)), nil +} + +//export Ligate +func Ligate(fragments *C.Fragment, fragmentCount C.int, circular C.int) (*C.char, *C.int, C.int, *C.char) { + goFragments := make([]clone.Fragment, int(fragmentCount)) + slice := (*[1<<30 - 1]C.Fragment)(unsafe.Pointer(fragments))[:fragmentCount:fragmentCount] + + for i := 0; i < int(fragmentCount); i++ { + goFragments[i] = clone.Fragment{ + Sequence: C.GoString(slice[i].sequence), + ForwardOverhang: C.GoString(slice[i].forward_overhang), + ReverseOverhang: C.GoString(slice[i].reverse_overhang), + } + } + + ligation, ligationPattern, err := clone.Ligate(goFragments, circular != 0) + if err != nil { + return nil, nil, 0, C.CString(err.Error()) + } + + cLigation := C.CString(ligation) + cLigationPattern := (*C.int)(C.malloc(C.size_t(len(ligationPattern)) * C.sizeof_int)) + cLigationPatternSlice := (*[1<<30 - 1]C.int)(unsafe.Pointer(cLigationPattern))[:len(ligationPattern):len(ligationPattern)] + for i, v := range ligationPattern { + cLigationPatternSlice[i] = C.int(v) + } + + return cLigation, cLigationPattern, C.int(len(ligationPattern)), nil +} + +//export GoldenGate +func GoldenGate(sequences *C.Part, sequenceCount C.int, cuttingEnzymeName *C.char, methylated C.int) (*C.char, *C.int, C.int, *C.char) { + goParts := make([]clone.Part, int(sequenceCount)) + slice := (*[1<<30 - 1]C.Part)(unsafe.Pointer(sequences))[:sequenceCount:sequenceCount] + + for i := 0; i < int(sequenceCount); i++ { + goParts[i] = clone.Part{ + Sequence: C.GoString(slice[i].sequence), + Circular: slice[i].circular != 0, + } + } + + // Look up the cutting enzyme by name + enzymeName := C.GoString(cuttingEnzymeName) + cuttingEnzyme, ok := clone.DefaultEnzymes[enzymeName] + if !ok { + return nil, nil, 0, C.CString(fmt.Sprintf("Unknown enzyme: %s", enzymeName)) + } + + result, pattern, err := clone.GoldenGate(goParts, cuttingEnzyme, methylated != 0) + if err != nil { + return nil, nil, 0, C.CString(err.Error()) + } + + cResult := C.CString(result) + cPattern := (*C.int)(C.malloc(C.size_t(len(pattern)) * C.sizeof_int)) + cPatternSlice := (*[1<<30 - 1]C.int)(unsafe.Pointer(cPattern))[:len(pattern):len(pattern)] + for i, v := range pattern { + cPatternSlice[i] = C.int(v) + } + + return cResult, cPattern, C.int(len(pattern)), nil +} + +/****************************************************************************** +Aug 28, 2024 + +Fragment Package Functions + +******************************************************************************/ + +//export SetEfficiency +func SetEfficiency(overhangs **C.char, overhangCount C.int) C.double { + goOverhangs := make([]string, int(overhangCount)) + slice := (*[1<<30 - 1]*C.char)(unsafe.Pointer(overhangs))[:overhangCount:overhangCount] + + for i := 0; i < int(overhangCount); i++ { + goOverhangs[i] = C.GoString(slice[i]) + } + + return C.double(fragment.SetEfficiency(goOverhangs)) +} + +//export NextOverhangs +func NextOverhangs(currentOverhangs **C.char, overhangCount C.int) (**C.char, *C.double, C.int, *C.char) { + goCurrentOverhangs := make([]string, int(overhangCount)) + slice := (*[1<<30 - 1]*C.char)(unsafe.Pointer(currentOverhangs))[:overhangCount:overhangCount] + + for i := 0; i < int(overhangCount); i++ { + goCurrentOverhangs[i] = C.GoString(slice[i]) + } + + nextOverhangs, efficiencies := fragment.NextOverhangs(goCurrentOverhangs) + + cNextOverhangs := (**C.char)(C.malloc(C.size_t(len(nextOverhangs)) * C.size_t(unsafe.Sizeof(uintptr(0))))) + cNextOverhangsSlice := (*[1<<30 - 1]*C.char)(unsafe.Pointer(cNextOverhangs))[:len(nextOverhangs):len(nextOverhangs)] + + cEfficiencies := (*C.double)(C.malloc(C.size_t(len(efficiencies)) * C.sizeof_double)) + cEfficienciesSlice := (*[1<<30 - 1]C.double)(unsafe.Pointer(cEfficiencies))[:len(efficiencies):len(efficiencies)] + + for i, overhang := range nextOverhangs { + cNextOverhangsSlice[i] = C.CString(overhang) + cEfficienciesSlice[i] = C.double(efficiencies[i]) + } + + return cNextOverhangs, cEfficiencies, C.int(len(nextOverhangs)), nil +} + +//export NextOverhang +func NextOverhang(currentOverhangs **C.char, overhangCount C.int) *C.char { + goCurrentOverhangs := make([]string, int(overhangCount)) + slice := (*[1<<30 - 1]*C.char)(unsafe.Pointer(currentOverhangs))[:overhangCount:overhangCount] + + for i := 0; i < int(overhangCount); i++ { + goCurrentOverhangs[i] = C.GoString(slice[i]) + } + + return C.CString(fragment.NextOverhang(goCurrentOverhangs)) +} + +//export FragmentSequence +func FragmentSequence(sequence *C.char, minFragmentSize C.int, maxFragmentSize C.int, excludeOverhangs **C.char, excludeOverhangCount C.int) (**C.char, C.int, C.double, *C.char) { + goSequence := C.GoString(sequence) + goExcludeOverhangs := make([]string, int(excludeOverhangCount)) + slice := (*[1<<30 - 1]*C.char)(unsafe.Pointer(excludeOverhangs))[:excludeOverhangCount:excludeOverhangCount] + + for i := 0; i < int(excludeOverhangCount); i++ { + goExcludeOverhangs[i] = C.GoString(slice[i]) + } + + fragments, efficiency, err := fragment.Fragment(goSequence, int(minFragmentSize), int(maxFragmentSize), goExcludeOverhangs) + if err != nil { + return nil, 0, 0, C.CString(err.Error()) + } + + cFragments := (**C.char)(C.malloc(C.size_t(len(fragments)) * C.size_t(unsafe.Sizeof(uintptr(0))))) + cFragmentsSlice := (*[1<<30 - 1]*C.char)(unsafe.Pointer(cFragments))[:len(fragments):len(fragments)] + + for i, frag := range fragments { + cFragmentsSlice[i] = C.CString(frag) + } + + return cFragments, C.int(len(fragments)), C.double(efficiency), nil +} + +//export FragmentSequenceWithOverhangs +func FragmentSequenceWithOverhangs(sequence *C.char, minFragmentSize C.int, maxFragmentSize C.int, excludeOverhangs **C.char, excludeOverhangCount C.int, includeOverhangs **C.char, includeOverhangCount C.int) (**C.char, C.int, C.double, *C.char) { + goSequence := C.GoString(sequence) + goExcludeOverhangs := make([]string, int(excludeOverhangCount)) + goIncludeOverhangs := make([]string, int(includeOverhangCount)) + + excludeSlice := (*[1<<30 - 1]*C.char)(unsafe.Pointer(excludeOverhangs))[:excludeOverhangCount:excludeOverhangCount] + includeSlice := (*[1<<30 - 1]*C.char)(unsafe.Pointer(includeOverhangs))[:includeOverhangCount:includeOverhangCount] + + for i := 0; i < int(excludeOverhangCount); i++ { + goExcludeOverhangs[i] = C.GoString(excludeSlice[i]) + } + + for i := 0; i < int(includeOverhangCount); i++ { + goIncludeOverhangs[i] = C.GoString(includeSlice[i]) + } + + fragments, efficiency, err := fragment.FragmentWithOverhangs(goSequence, int(minFragmentSize), int(maxFragmentSize), goExcludeOverhangs, goIncludeOverhangs) + if err != nil { + return nil, 0, 0, C.CString(err.Error()) + } + + cFragments := (**C.char)(C.malloc(C.size_t(len(fragments)) * C.size_t(unsafe.Sizeof(uintptr(0))))) + cFragmentsSlice := (*[1<<30 - 1]*C.char)(unsafe.Pointer(cFragments))[:len(fragments):len(fragments)] + + for i, frag := range fragments { + cFragmentsSlice[i] = C.CString(frag) + } + + return cFragments, C.int(len(fragments)), C.double(efficiency), nil +} + +//export RecursiveFragmentSequence +func RecursiveFragmentSequence(sequence *C.char, maxCodingSizeOligo C.int, assemblyPattern *C.int, patternCount C.int, excludeOverhangs **C.char, excludeCount C.int, includeOverhangs **C.char, includeCount C.int) (*C.Assembly, *C.char) { + goSequence := C.GoString(sequence) + goAssemblyPattern := make([]int, patternCount) + goExcludeOverhangs := make([]string, excludeCount) + goIncludeOverhangs := make([]string, includeCount) + patternSlice := (*[1<<30 - 1]C.int)(unsafe.Pointer(assemblyPattern))[:patternCount:patternCount] + excludeSlice := (*[1<<30 - 1]*C.char)(unsafe.Pointer(excludeOverhangs))[:excludeCount:excludeCount] + includeSlice := (*[1<<30 - 1]*C.char)(unsafe.Pointer(includeOverhangs))[:includeCount:includeCount] + for i := 0; i < int(patternCount); i++ { + goAssemblyPattern[i] = int(patternSlice[i]) + } + for i := 0; i < int(excludeCount); i++ { + goExcludeOverhangs[i] = C.GoString(excludeSlice[i]) + } + for i := 0; i < int(includeCount); i++ { + goIncludeOverhangs[i] = C.GoString(includeSlice[i]) + } + assembly, err := fragment.RecursiveFragment(goSequence, int(maxCodingSizeOligo), goAssemblyPattern, goExcludeOverhangs, goIncludeOverhangs) + if err != nil { + return nil, C.CString(err.Error()) + } + return convertAssemblyToC(assembly), nil +} + +func convertAssemblyToC(assembly fragment.Assembly) *C.Assembly { + cAssembly := (*C.Assembly)(C.malloc(C.size_t(unsafe.Sizeof(C.Assembly{})))) + cAssembly.sequence = C.CString(assembly.Sequence) + + // Convert fragments + cFragments := (**C.char)(C.malloc(C.size_t(len(assembly.Fragments)) * C.size_t(unsafe.Sizeof(uintptr(0))))) + cFragmentsSlice := (*[1<<30 - 1]*C.char)(unsafe.Pointer(cFragments))[:len(assembly.Fragments):len(assembly.Fragments)] + for i, frag := range assembly.Fragments { + cFragmentsSlice[i] = C.CString(frag) + } + cAssembly.fragments = cFragments + cAssembly.fragmentCount = C.int(len(assembly.Fragments)) + + cAssembly.efficiency = C.double(assembly.Efficiency) + + // Convert sub-assemblies recursively + if len(assembly.SubAssemblies) > 0 { + cSubAssemblies := (*C.Assembly)(C.malloc(C.size_t(len(assembly.SubAssemblies)) * C.size_t(unsafe.Sizeof(C.Assembly{})))) + cSubAssembliesSlice := (*[1<<30 - 1]C.Assembly)(unsafe.Pointer(cSubAssemblies))[:len(assembly.SubAssemblies):len(assembly.SubAssemblies)] + for i, subAssembly := range assembly.SubAssemblies { + cSubAssembliesSlice[i] = *convertAssemblyToC(subAssembly) + } + cAssembly.subAssemblies = unsafe.Pointer(cSubAssemblies) + cAssembly.subAssemblyCount = C.int(len(assembly.SubAssemblies)) + } else { + cAssembly.subAssemblies = nil + cAssembly.subAssemblyCount = 0 + } + + return cAssembly +} + /****************************************************************************** main.go diff --git a/py/setup.py b/py/setup.py index 55a6320..fac3c0d 100644 --- a/py/setup.py +++ b/py/setup.py @@ -13,7 +13,7 @@ def get_shared_lib_ext(): setup( name='dnadesign', - version='0.1.5', + version='0.1.6', packages=find_packages(), package_data={'dnadesign': ['definitions.h', 'libdnadesign.h', "libdnadesign" + get_shared_lib_ext()]}, install_requires=[ diff --git a/py/tests/test_clone.py b/py/tests/test_clone.py new file mode 100644 index 0000000..943bee9 --- /dev/null +++ b/py/tests/test_clone.py @@ -0,0 +1,30 @@ +import pytest +from dnadesign.clone import Part, Fragment, cut_with_enzyme_by_name, ligate, golden_gate + +def test_cut_with_enzyme(): + bsai = "GGTCTCAATGC" + bsai_complement = "ATGCAGAGACC" + + # Test case of `<-bsaiComplement bsai-> <-bsaiComplement bsai->` where BsaI cuts off of a linear sequence + sequence = Part("ATATATA" + bsai_complement + bsai + "ATGCATCGATCGACTAGCATG" + bsai_complement + bsai[:8], False) + fragments = cut_with_enzyme_by_name(sequence, True, "BsaI", False) + + assert len(fragments) == 1, "CutWithEnzyme should produce 1 fragment" + assert fragments[0].sequence == "ATGCATCGATCGACTAGCATG", f"Unexpected fragment sequence: {fragments[0].sequence}" + +def test_circular_ligate(): + fragment1 = Fragment("AAAAAA", "GTTG", "CTAT") + fragment2 = Fragment("AAAAAA", "CAAC", "ATAG") + output = ligate([fragment1, fragment2], True) + + assert output == "GTTGAAAAAACTATTTTTTT", "Unexpected ligation result" + +def test_golden_gate(): + fragment1 = Part("GAAGTGCCATTCCGCCTGACCTGAAGACCAGGAGAAACACGTGGCAAACATTCCGGTCTCAAATGGAAAAGAGCAACGAAACCAACGGCTACCTTGACAGCGCTCAAGCCGGCCCTGCAGCTGGCCCGGGCGCTCCGGGTACCGCCGCGGGTCGTGCACGTCGTTGCGCGGGCTTCCTGCGGCGCCAAGCGCTGGTGCTGCTCACGGTGTCTGGTGTTCTGGCAGGCGCCGGTTTGGGCGCGGCACTGCGTGGGCTCAGCCTGAGCCGCACCCAGGTCACCTACCTGGCCTTCCCCGGCGAGATGCTGCTCCGCATGCTGCGCATGATCATCCTGCCGCTGGTGGTCTGCAGCCTGGTGTCGGGCGCCGCCTCCCTCGATGCCAGCTGCCTCGGGCGTCTGGGCGGTATCGCTGTCGCCTACTTTGGCCTCACCACACTGAGTGCCTCGGCGCTCGCCGTGGCCTTGGCGTTCATCATCAAGCCAGGATCCGGTGCGCAGACCCTTCAGTCCAGCGACCTGGGGCTGGAGGACTCGGGGCCTCCTCCTGTCCCCAAAGAAACGGTGGACTCTTTCCTCGACCTGGCCAGAAACCTGTTTCCCTCCAATCTTGTGGTTGCAGCTTTCCGTACGTATGCAACCGATTATAAAGTCGTGACCCAGAACAGCAGCTCTGGAAATGTAACCCATGAAAAGATCCCCATAGGCACTGAGATAGAAGGGATGAACATTTTAGGATTGGTCCTGTTTGCTCTGGTGTTAGGAGTGGCCTTAAAGAAACTAGGCTCCGAAGGAGAGGACCTCATCCGTTTCTTCAATTCCCTCAACGAGGCGACGATGGTGCTGGTGTCCTGGATTATGTGGTACGCGTCTTCAGGCTAGGTGGAGGCTCAGTG", False) + fragment2 = Part("GAAGTGCCATTCCGCCTGACCTGAAGACCAGTACGTACCTGTGGGCATCATGTTCCTTGTTGGAAGCAAGATCGTGGAAATGAAAGACATCATCGTGCTGGTGACCAGCCTGGGGAAATACATCTTCGCATCTATATTGGGCCACGTCATTCATGGTGGTATCGTCCTGCCGCTGATTTATTTTGTTTTCACACGAAAAAACCCATTCAGATTCCTCCTGGGCCTCCTCGCCCCATTTGCGACAGCATTTGCTACGTGCTCCAGCTCAGCGACCCTTCCCTCTATGATGAAGTGCATTGAAGAGAACAATGGTGTGGACAAGAGGATCTCCAGGTTTATTCTCCCCATCGGGGCCACCGTGAACATGGACGGAGCAGCCATCTTCCAGTGTGTGGCCGCGGTGTTCATTGCGCAACTCAACAACGTAGAGCTCAACGCAGGACAGATTTTCACCATTCTAGTGACTGCCACAGCGTCCAGTGTTGGAGCAGCAGGCGTGCCAGCTGGAGGGGTCCTCACCATTGCCATTATCCTGGAGGCCATTGGGCTGCCTACTCATGATCTGCCTCTGATCCTGGCTGTGGACTGGATTGTGGACCGGACCACCACGGTGGTGAATGTGGAAGGGGATGCCCTGGGTGCAGGCATTCTCCACCACCTGAATCAGAAGGCAACAAAGAAAGGCGAGCAGGAACTTGCTGAGGTGAAAGTGGAAGCCATCCCCAACTGCAAGTCTGAGGAGGAAACCTCGCCCCTGGTGACACACCAGAACCCCGCTGGCCCCGTGGCCAGTGCCCCAGAACTGGAATCCAAGGAGTCGGTTCTGTGAAGAGCTTAGAGACCGACGACTGCCTAAGGACATTCGCTGCGTCTTCAGGCTAGGTGGAGGCTCAGTG", False) + popen = Part("TAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGGCCTACTATTAGCAACAACGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAACCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGGTTCAGGTGGAGTGGGAGTAGTCTTCGCCATCGCTACTAAAAGCCAGATAACAGTATGCGTATTTGCGCGCTGATTTTTGCGGTATAAGAATATATACTGATATGTATACCCGAAGTATGTCAAAAAGAGGTATGCTATGAAGCAGCGTATTACAGTGACAGTTGACAGCGACAGCTATCAGTTGCTCAAGGCATATATGATGTCAATATCTCCGGTCTGGTAAGCACAACCATGCAGAATGAAGCCCGTCGTCTGCGTGCCGAACGCTGGAAAGCGGAAAATCAGGAAGGGATGGCTGAGGTCGCCCGGTTTATTGAAATGAACGGCTCTTTTGCTGACGAGAACAGGGGCTGGTGAAATGCAGTTTAAGGTTTACACCTATAAAAGAGAGAGCCGTTATCGTCTGTTTGTGGATGTACAGAGTGATATTATTGACACGCCCGGGCGACGGATGGTGATCCCCCTGGCCAGTGCACGTCTGCTGTCAGATAAAGTCTCCCGTGAACTTTACCCGGTGGTGCATATCGGGGATGAAAGCTGGCGCATGATGACCACCGATATGGCCAGTGTGCCGGTCTCCGTTATCGGGGAAGAAGTGGCTGATCTCAGCCACCGCGAAAATGACATCAAAAACGCCATTAACCTGATGTTCTGGGGAATATAAATGTCAGGCTCCCTTATACACAGGCGATGTTGAAGACCACGCTGAGGTGTCAATCGTCGGAGCCGCTGAGCAATAACTAGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGGTTTTTTGCATGGTCATAGCTGTTTCCTGAGAGCTTGGCAGGTGATGACACACATTAACAAATTTCGTGAGGAGTCTCCAGAAGAATGCCATTAATTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGG", True) + + plasmid = golden_gate([fragment1, fragment2, popen], "BbsI", False) + + expected = "GGAGAAACACGTGGCAAACATTCCGGTCTCAAATGGAAAAGAGCAACGAAACCAACGGCTACCTTGACAGCGCTCAAGCCGGCCCTGCAGCTGGCCCGGGCGCTCCGGGTACCGCCGCGGGTCGTGCACGTCGTTGCGCGGGCTTCCTGCGGCGCCAAGCGCTGGTGCTGCTCACGGTGTCTGGTGTTCTGGCAGGCGCCGGTTTGGGCGCGGCACTGCGTGGGCTCAGCCTGAGCCGCACCCAGGTCACCTACCTGGCCTTCCCCGGCGAGATGCTGCTCCGCATGCTGCGCATGATCATCCTGCCGCTGGTGGTCTGCAGCCTGGTGTCGGGCGCCGCCTCCCTCGATGCCAGCTGCCTCGGGCGTCTGGGCGGTATCGCTGTCGCCTACTTTGGCCTCACCACACTGAGTGCCTCGGCGCTCGCCGTGGCCTTGGCGTTCATCATCAAGCCAGGATCCGGTGCGCAGACCCTTCAGTCCAGCGACCTGGGGCTGGAGGACTCGGGGCCTCCTCCTGTCCCCAAAGAAACGGTGGACTCTTTCCTCGACCTGGCCAGAAACCTGTTTCCCTCCAATCTTGTGGTTGCAGCTTTCCGTACGTATGCAACCGATTATAAAGTCGTGACCCAGAACAGCAGCTCTGGAAATGTAACCCATGAAAAGATCCCCATAGGCACTGAGATAGAAGGGATGAACATTTTAGGATTGGTCCTGTTTGCTCTGGTGTTAGGAGTGGCCTTAAAGAAACTAGGCTCCGAAGGAGAGGACCTCATCCGTTTCTTCAATTCCCTCAACGAGGCGACGATGGTGCTGGTGTCCTGGATTATGTGGTACGTACCTGTGGGCATCATGTTCCTTGTTGGAAGCAAGATCGTGGAAATGAAAGACATCATCGTGCTGGTGACCAGCCTGGGGAAATACATCTTCGCATCTATATTGGGCCACGTCATTCATGGTGGTATCGTCCTGCCGCTGATTTATTTTGTTTTCACACGAAAAAACCCATTCAGATTCCTCCTGGGCCTCCTCGCCCCATTTGCGACAGCATTTGCTACGTGCTCCAGCTCAGCGACCCTTCCCTCTATGATGAAGTGCATTGAAGAGAACAATGGTGTGGACAAGAGGATCTCCAGGTTTATTCTCCCCATCGGGGCCACCGTGAACATGGACGGAGCAGCCATCTTCCAGTGTGTGGCCGCGGTGTTCATTGCGCAACTCAACAACGTAGAGCTCAACGCAGGACAGATTTTCACCATTCTAGTGACTGCCACAGCGTCCAGTGTTGGAGCAGCAGGCGTGCCAGCTGGAGGGGTCCTCACCATTGCCATTATCCTGGAGGCCATTGGGCTGCCTACTCATGATCTGCCTCTGATCCTGGCTGTGGACTGGATTGTGGACCGGACCACCACGGTGGTGAATGTGGAAGGGGATGCCCTGGGTGCAGGCATTCTCCACCACCTGAATCAGAAGGCAACAAAGAAAGGCGAGCAGGAACTTGCTGAGGTGAAAGTGGAAGCCATCCCCAACTGCAAGTCTGAGGAGGAAACCTCGCCCCTGGTGACACACCAGAACCCCGCTGGCCCCGTGGCCAGTGCCCCAGAACTGGAATCCAAGGAGTCGGTTCTGTGAAGAGCTTAGAGACCGACGACTGCCTAAGGACATTCGCTGAGGTGTCAATCGTCGGAGCCGCTGAGCAATAACTAGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGGTTTTTTGCATGGTCATAGCTGTTTCCTGAGAGCTTGGCAGGTGATGACACACATTAACAAATTTCGTGAGGAGTCTCCAGAAGAATGCCATTAATTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGGCCTACTATTAGCAACAACGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAACCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGGTTCAGGTGGAGTG" + assert plasmid == expected, "unexpected goldengate" diff --git a/py/tests/test_fragment.py b/py/tests/test_fragment.py new file mode 100644 index 0000000..0a7b203 --- /dev/null +++ b/py/tests/test_fragment.py @@ -0,0 +1,44 @@ +import pytest +from dnadesign.fragment import fragment, next_overhang, recursive_fragment + +def test_basic_fragment(): + lacZ = "ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG" + fragments, _, _ = fragment(lacZ, 95, 105, ["AAAA"]) + + expected_fragments = [ + "ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGG", + "CTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAAC", + "CAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATC", + "CATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG" + ] + + assert fragments == expected_fragments, "Unexpected fragmentation result" + +def test_next_overhang(): + primer_overhangs = ["ATAA"] + primer_overhangs.append(next_overhang(primer_overhangs)) + primer_overhangs.append(next_overhang(primer_overhangs)) + primer_overhangs.append(next_overhang(primer_overhangs)) + + expected_overhangs = ["ATAA", "AAAT", "AATA", "AAGA"] + assert primer_overhangs == expected_overhangs, "Unexpected overhang generation" + +def test_fragment_efficiency(): + lacZ = "ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG" + fragments, efficiency, _ = fragment(lacZ, 95, 105, []) + + assert len(fragments) > 1, "Expected multiple fragments" + assert fragments[1] == "CTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACA", "Unexpected second fragment" + assert efficiency == pytest.approx(1.0, abs=1e-6), "Unexpected efficiency" + +def test_recursive_fragment(): + # These are the 46 possible overhangs, plus the two identity overhangs CGAG+GTCT + default_overhangs = ["GGGG", "AAAA", "AACT", "AATG", "ATCC", "CGCT", "TTCT", "AAGC", "ATAG", "ATTA", "ATGT", "ACTC", "ACGA", "TATC", "TAGG", "TACA", "TTAC", "TTGA", "TGGA", "GAAG", "GACC", "GCCG", "TCTG", "GTTG", "GTGC", "TGCC", "CTGG", "TAAA", "TGAG", "AAGA", "AGGT", "TTCG", "ACTA", "TTAG", "TCTC", "TCGG", "ATAA", "ATCA", "TTGC", "CACG", "AATA", "ACAA", "ATGG", "TATG", "AAAT", "TCAC"] + exclude_overhangs = ["CGAG", "GTCT"] # These are the recursive BsaI definitions, and must be excluded from all builds. + gene = "ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG" + max_oligo_len = 174 # for Agilent oligo pools + assembly_pattern = [5, 4, 4, 5] # seems reasonable enough + + result = recursive_fragment(gene, max_oligo_len, assembly_pattern, exclude_overhangs, default_overhangs) + assert result is not None, "RecursiveFragment failed" + # Add more specific assertions based on the expected structure of the result