Skip to content

Commit 489cf97

Browse files
Koeng101TimothyStiles
Koeng101
andauthored
Add FragmentWithOverhangs (#387)
* Add FragmentWithOverhangs * Fragment naming updated --------- Co-authored-by: Tim <[email protected]>
1 parent 96b5cff commit 489cf97

File tree

3 files changed

+44
-10
lines changed

3 files changed

+44
-10
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111
- Alternative start codons can now be used in the `synthesis/codon` DNA -> protein translation package (#305)
1212
- Added a parser and writer for the `pileup` sequence alignment format (#329)
1313
- Added statistics to the `synthesis/codon` package (keeping track of the observed start codon occurrences in a translation table) (#350)
14+
- Added option to fragmenter to fragment with only certain overhangs (#387)
15+
16+
17+
1418

1519
### Fixed
1620
- `fastq` parser no longer becomes de-aligned when reading (#325)

synthesis/fragment/fragment.go

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,11 @@ func NextOverhang(currentOverhangs []string) string {
9898
}
9999

100100
// optimizeOverhangIteration takes in a sequence and optimally fragments it.
101-
func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragmentSize int, existingFragments []string, existingOverhangs []string) ([]string, float64, error) {
101+
func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragmentSize int, existingFragments []string, excludeOverhangs []string, includeOverhangs []string) ([]string, float64, error) {
102102
// If the sequence is smaller than maxFragment size, stop iteration.
103103
if len(sequence) < maxFragmentSize {
104104
existingFragments = append(existingFragments, sequence)
105-
return existingFragments, SetEfficiency(existingOverhangs), nil
105+
return existingFragments, SetEfficiency(excludeOverhangs), nil
106106
}
107107

108108
// Make sure minFragmentSize > maxFragmentSize
@@ -136,23 +136,35 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment
136136
var bestOverhangEfficiency float64
137137
var bestOverhangPosition int
138138
var alreadyExists bool
139+
var buildAvailable bool
139140
for overhangOffset := 0; overhangOffset <= maxFragmentSize-minFragmentSize; overhangOffset++ {
140141
// We go from max -> min, so we can maximize the size of our fragments
141142
overhangPosition := maxFragmentSize - overhangOffset
142143
overhangToTest := sequence[overhangPosition-4 : overhangPosition]
143144

144145
// Make sure overhang isn't already in set
145146
alreadyExists = false
146-
for _, existingOverhang := range existingOverhangs {
147-
if existingOverhang == overhangToTest || transform.ReverseComplement(existingOverhang) == overhangToTest {
147+
for _, excludeOverhang := range excludeOverhangs {
148+
if excludeOverhang == overhangToTest || transform.ReverseComplement(excludeOverhang) == overhangToTest {
148149
alreadyExists = true
149150
}
150151
}
151-
if !alreadyExists {
152+
// Make sure overhang is in set of includeOverhangs. If includeOverhangs is
153+
// blank, skip this check.
154+
buildAvailable = false
155+
if len(includeOverhangs) == 0 {
156+
buildAvailable = true
157+
}
158+
for _, includeOverhang := range includeOverhangs {
159+
if includeOverhang == overhangToTest || transform.ReverseComplement(includeOverhang) == overhangToTest {
160+
buildAvailable = true
161+
}
162+
}
163+
if !alreadyExists && buildAvailable {
152164
// See if this overhang is a palindrome
153165
if !checks.IsPalindromic(overhangToTest) {
154166
// Get this overhang set's efficiency
155-
setEfficiency := SetEfficiency(append(existingOverhangs, overhangToTest))
167+
setEfficiency := SetEfficiency(append(excludeOverhangs, overhangToTest))
156168

157169
// If this overhang is more efficient than any other found so far, set it as the best!
158170
if setEfficiency > bestOverhangEfficiency {
@@ -167,16 +179,24 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment
167179
return []string{}, float64(0), fmt.Errorf("bestOverhangPosition failed by equaling zero")
168180
}
169181
existingFragments = append(existingFragments, sequence[:bestOverhangPosition])
170-
existingOverhangs = append(existingOverhangs, sequence[bestOverhangPosition-4:bestOverhangPosition])
182+
excludeOverhangs = append(excludeOverhangs, sequence[bestOverhangPosition-4:bestOverhangPosition])
171183
sequence = sequence[bestOverhangPosition-4:]
172-
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, existingFragments, existingOverhangs)
184+
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, existingFragments, excludeOverhangs, includeOverhangs)
173185
}
174186

175187
// Fragment fragments a sequence into fragments between the min and max size,
176188
// choosing fragment ends for optimal assembly efficiency. Since fragments will
177189
// be inserted into either a vector or primer binding sites, the first 4 and
178190
// last 4 base pairs are the initial overhang set.
179-
func Fragment(sequence string, minFragmentSize int, maxFragmentSize int, existingOverhangs []string) ([]string, float64, error) {
191+
func Fragment(sequence string, minFragmentSize int, maxFragmentSize int, excludeOverhangs []string) ([]string, float64, error) {
192+
sequence = strings.ToUpper(sequence)
193+
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, excludeOverhangs...), []string{})
194+
}
195+
196+
// FragmentWithOverhangs fragments a sequence with only a certain overhang set.
197+
// This is useful if you are constraining the set of possible overhangs when
198+
// doing more advanced forms of cloning.
199+
func FragmentWithOverhangs(sequence string, minFragmentSize int, maxFragmentSize int, excludeOverhangs []string, includeOverhangs []string) ([]string, float64, error) {
180200
sequence = strings.ToUpper(sequence)
181-
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, existingOverhangs...))
201+
return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, excludeOverhangs...), includeOverhangs)
182202
}

synthesis/fragment/fragment_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,13 @@ func TestRegressionTestMatching12(t *testing.T) {
8585
t.Errorf("Expected efficiency of .99 - approximately matches NEB ligase fidelity viewer of .97. Got: %g", efficiency)
8686
}
8787
}
88+
89+
func TestFragmentWithOverhangs(t *testing.T) {
90+
defaultOverhangs := []string{"CGAG", "GTCT", "GGGG", "AAAA", "AACT", "AATG", "ATCC", "CGCT", "TTCT", "AAGC", "ATAG", "ATTA", "ATGT", "ACTC", "ACGA", "TATC", "TAGG", "TACA", "TTAC", "TTGA", "TGGA", "GAAG", "GACC", "GCCG", "TCTG", "GTTG", "GTGC", "TGCC", "CTGG", "TAAA", "TGAG", "AAGA", "AGGT", "TTCG", "ACTA", "TTAG", "TCTC", "TCGG", "ATAA", "ATCA", "TTGC", "CACG", "AATA", "ACAA", "ATGG", "TATG", "AAAT", "TCAC"}
91+
gene := "atgaaaaaatttaactggaagaaaatagtcgcgccaattgcaatgctaattattggcttactaggtggtttacttggtgcctttatcctactaacagcagccggggtatcttttaccaatacaacagatactggagtaaaaacggctaagaccgtctacaccaatataacagatacaactaaggctgttaagaaagtacaaaatgccgttgtttctgtcatcaattatcaagaaggttcatcttcagattctctaaatgacctttatggccgtatctttggcggaggggacagttctgattctagccaagaaaattcaaaagattcagatggtctacaggtcgctggtgaaggttctggagtcatctataaaaaagatggcaaagaagcctacatcgtaaccaataaccatgttgtcgatggggctaaaaaacttgaaatcatgctttcggatggttcgaaaattactggtgaacttgttggtaaagacacttactctgacctagcagttgtcaaagtatcttcagataaaataacaactgttgcagaatttgcagactcaaactcccttactgttggtgaaaaagcaattgctatcggtagcccacttggtaccgaatacgccaactcagtaacagaaggaatcgtttctagccttagccgtactataacgatgcaaaacgataatggtgaaactgtatcaacaaacgctatccaaacagatgcagccattaaccctggtaactctggtggtgccctagtcaatattgaaggacaagttatcggtattaattcaagtaaaatttcatcaacgtctgcagtcgctggtagtgctgttgaaggtatggggtttgccattccatcaaacgatgttgttgaaatcatcaatcaattagaaaaagatggtaaagttacacgaccagcactaggaatctcaatagcagatcttaatagcctttctagcagcgcaacttctaaattagatttaccagatgaggtcaaatccggtgttgttgtcggtagtgttcagaaaggtatgccagctgacggtaaacttcaagaatatgatgttatcactgagattgatggtaagaaaatcagctcaaaaactgatattcaaaccaatctttacagccatagtatcggagatactatcaaggtaaccttctatcgtggtaaagataagaaaactgtagatcttaaattaacaaaatctacagaagacatatctgattaa"
92+
93+
_, _, err := FragmentWithOverhangs(gene, 90, 110, []string{}, defaultOverhangs)
94+
if err != nil {
95+
t.Errorf(err.Error())
96+
}
97+
}

0 commit comments

Comments
 (0)