Skip to content

Commit

Permalink
add an augument to 'AlignmentResult.AlignmentText()' and 'AlignmentRe…
Browse files Browse the repository at this point in the history
…sult.CIGAR'
  • Loading branch information
shenwei356 committed Oct 31, 2024
1 parent 25a8d89 commit 1eb3d93
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 17 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

- v0.4.0
- add an augument to `AlignmentResult.AlignmentText()` and `AlignmentResult.CIGAR`.
- wfa-go: add a new flag `-t` for only showing the aligned region.
- v0.3.1
- use a more efficient data structure to store cigar operations.
- slight speed improvement.
Expand Down
7 changes: 4 additions & 3 deletions wfa-go/wfa-go.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (
"github.com/shenwei356/wfa"
)

var version = "0.3.1"
var version = "0.4.0"

func main() {
app := filepath.Base(os.Args[0])
Expand Down Expand Up @@ -72,6 +72,7 @@ Options/Flags:
noGlobal := flag.Bool("g", false, "do not use global alignment")
noAdaptive := flag.Bool("a", false, "do not use adaptive reduction")
noOutput := flag.Bool("N", false, "do not output alignment (for benchmark)")
trim := flag.Bool("t", false, "only show the aligned region")

pprofCPU := flag.Bool("p", false, "cpu pprof. go tool pprof -http=:8080 cpu.pprof")
pprofMem := flag.Bool("m", false, "mem pprof. go tool pprof -http=:8080 mem.pprof")
Expand Down Expand Up @@ -118,13 +119,13 @@ Options/Flags:
}

if !*noOutput {
Q, A, T := result.AlignmentText(&_q, &_t)
Q, A, T := result.AlignmentText(&_q, &_t, *trim)

// fmt.Fprintln(outfh, q, t)
fmt.Fprintf(outfh, "query %s\n", *Q)
fmt.Fprintf(outfh, " %s\n", *A)
fmt.Fprintf(outfh, "target %s\n", *T)
fmt.Fprintf(outfh, "cigar %s\n", result.CIGAR())
fmt.Fprintf(outfh, "cigar %s\n", result.CIGAR(*trim))
fmt.Fprintln(outfh)
fmt.Fprintf(outfh, "align-score : %d\n", result.Score)
fmt.Fprintf(outfh, "match-region: q[%d, %d]/%d vs t[%d, %d]/%d\n",
Expand Down
2 changes: 1 addition & 1 deletion wfa.go
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ func (algn *Aligner) backTrace(q, t *[]byte, s uint32, Ak int) *AlignmentResult
lenQ := len(*q)
lenT := len(*t)

cigar := NewAlignmentResult()
cigar := NewAlignmentResult(algn.opt.GlobalAlignment)
cigar.Score = s

var ok bool
Expand Down
60 changes: 49 additions & 11 deletions wfa_cigar.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ type AlignmentResult struct {
GapRegions uint32

proccessed bool

globalAlignment bool
}

// // CIGARRecord records the operation and the number.
Expand All @@ -64,9 +66,10 @@ const OpH = uint64('H')
const MaskLower32 = 4294967295

// NewAlignmentResult returns a new CIGAR from the object pool.
func NewAlignmentResult() *AlignmentResult {
func NewAlignmentResult(globalAlignment bool) *AlignmentResult {
cigar := poolCIGAR.Get().(*AlignmentResult)
cigar.reset()
cigar.globalAlignment = globalAlignment
return cigar
}

Expand Down Expand Up @@ -210,13 +213,36 @@ func (cigar *AlignmentResult) process() {
cigar.proccessed = true
}

// trimOps trim ops to keep only aligned region
func trimOps(ops []uint64) []uint64 {
var start, end int
start, end = -1, -1
for i, op := range ops {
if op>>32 == OpM {
start = i
break
}
}
for i := len(ops) - 1; i >= 0; i-- {
if ops[i]>>32 == OpM {
end = i
break
}
}
return ops[start : end+1]
}

// CIGAR returns the CIGAR string.
func (cigar *AlignmentResult) CIGAR() string {
func (cigar *AlignmentResult) CIGAR(onlyAignedRegion bool) string {
cigar.process()
buf := poolBytesBuffer.Get().(*bytes.Buffer)
buf.Reset()

for _, op := range cigar.Ops {
ops := cigar.Ops
if onlyAignedRegion {
ops = trimOps(cigar.Ops)
}
for _, op := range ops {
// buf.WriteString(strconv.Itoa(int(op.N)))
buf.WriteString(strconv.Itoa(int(op & MaskLower32)))
// buf.WriteByte(op.Op)
Expand All @@ -230,9 +256,21 @@ func (cigar *AlignmentResult) CIGAR() string {

// AlignmentText returns the formated alignment text for Query, Alignment, and Target.
// Do not forget to recycle them with RecycleAlignmentText().
func (cigar *AlignmentResult) AlignmentText(q, t *[]byte) (*[]byte, *[]byte, *[]byte) {
func (cigar *AlignmentResult) AlignmentText(q0, t0 *[]byte, onlyAignedRegion bool) (*[]byte, *[]byte, *[]byte) {
cigar.process()

var q, t []byte
ops := cigar.Ops
if !onlyAignedRegion {
q = *q0
t = *t0
} else {
q = (*q0)[cigar.QBegin-1 : cigar.QEnd]
t = (*t0)[cigar.TBegin-1 : cigar.TEnd]

ops = trimOps(cigar.Ops)
}

Q := poolBytes.Get().(*[]byte)
A := poolBytes.Get().(*[]byte)
T := poolBytes.Get().(*[]byte)
Expand All @@ -245,7 +283,7 @@ func (cigar *AlignmentResult) AlignmentText(q, t *[]byte) (*[]byte, *[]byte, *[]
v, h = 0, 0
// var i uint32
var i, n uint64
for _, op := range cigar.Ops {
for _, op := range ops {
n = op & MaskLower32

// switch op.Op {
Expand All @@ -254,19 +292,19 @@ func (cigar *AlignmentResult) AlignmentText(q, t *[]byte) (*[]byte, *[]byte, *[]
case OpM:
// for i = 0; i < op.N; i++ {
for i = 0; i < n; i++ {
*Q = append(*Q, (*q)[v])
*Q = append(*Q, q[v])
*A = append(*A, '|')
*T = append(*T, (*t)[h])
*T = append(*T, t[h])
v++
h++
}
// case 'X':
case OpX:
// for i = 0; i < op.N; i++ {
for i = 0; i < n; i++ {
*Q = append(*Q, (*q)[v])
*Q = append(*Q, q[v])
*A = append(*A, ' ')
*T = append(*T, (*t)[h])
*T = append(*T, t[h])
v++
h++
}
Expand All @@ -276,14 +314,14 @@ func (cigar *AlignmentResult) AlignmentText(q, t *[]byte) (*[]byte, *[]byte, *[]
for i = 0; i < n; i++ {
*Q = append(*Q, '-')
*A = append(*A, ' ')
*T = append(*T, (*t)[h])
*T = append(*T, t[h])
h++
}
// case 'D', 'H':
case OpD, OpH:
// for i = 0; i < op.N; i++ {
for i = 0; i < n; i++ {
*Q = append(*Q, (*q)[v])
*Q = append(*Q, q[v])
*A = append(*A, ' ')
*T = append(*T, '-')
v++
Expand Down
8 changes: 6 additions & 2 deletions wfa_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ func TestWFA(_t *testing.T) {
// t = []byte("AAATTCTTGGTCAGAAAAGCAACCCCCCACAAGGTCGTCACACCTGGTTCACAGGGATCCTTCTGGTTAGCCAGTGCGAAGGCGATACGGGGGGAGAGCAGACAATTGCGTACAGGTTGAACTGTACTGGCTGAAAAAATTCCTGGCTCAACCCAAAGTTAAGTAGTATAATCGACAACGCAGTCTACAGATGCGGCCCGCGGATATAATTGTCCCCCGTGAAGTCACTGTGTCAAACGCGGAACGGTAATGCTCACCCCAATTCGTCGAAACAGTTAACTCCAGGCTAGATCTCACAAAACCGGAGCGGGACACCGATCACGATTTCACCCGTTATCGGAAGGCAGTTCTCGCTGCTGGTTACCTAGTGACTCACCCTAGCGGAACGTAGACTTGGTATCAAGCTTGCTTAACGCGTTCTCCCCCTTCAAGATAGTAAAATAACGCTGCCGTTTCATCCAAACTATTCTATATCCTTACCGTACTGGTTATTACTCTGGTAGACCGATCATGGTCAGACCACTATTTCACCAAGGGTAGCCACTCCATGCCGTAGGAGACACGCAAATAGCCACCGGGCATGTTGTAGACTGGAGAATCCCTTTGCCTCAGATCCGCATGTGCACTAAAGGCCCCTTGTAGCACGGAGAGATGGATTTTATGGAAGTTCTAAAAGTTTTGTTACTTCTTCCAACCTCCAACCCTGTATATGAAATACTTTCCAGTCACCGGGTTTCTATGCGCGTCCTTATTGCTCAAGGTAAGAAATCTCTGTCCATCAAATTCGAGTGAGTTATGCGTAATGTAAAACAAGTTACTGAGTTCAAAGCTGGTCCTGGTGATGGTGTCCCGTACACCCAAGAAAGTGATACTTAGCTTTACGCGTTAATGCCTCGGCAGACATCAGGTCCATACGAAGCAAGGAAAGCCCCGCCACTGCCACACTGGCTAGTCCCCCGAGTCACTCCCAAGCCTGGCAACCTCGATACCCAAGAGGAGTA")
// q = []byte("AAATTCTTGGTCAGAAAAGCAACCCCTCACAAGATCGTCACAGCTGGTTCAAGGGATCCTTCTGGTTAGCCAGTGCGAAGGCGATACGGGGGAGAGCAGACAATTGCGTCAGGTTGAACTGTCTGGCTGAAAAAATTCCTGGCCCAACCCAAAGTTAAGTAGTATAATCGGAAAGCAGTCTACAGATGCGGCCCGCGGATATAATTGTCCCCGTGAAGTCACTGTGTCAAACTCGGAACGGTAATGCTCACCCCAATTCATGCGAAACAGTTGAACTCCAGGCTAGATCCTCACAACCGGAGCGGGACACCGATCACGATTTCACCCGTTATCGGAAGGCATTCTCGCTGCTGGTTACCTAGTGACTCACGCCCTAGCGGAACGTAGACTTGGTATCAAGCTTGCTTAACGCGTTCTCCCCCTTCAGGATAGTAAATTAACGCTGCCGTTTCATCCAAACTATTCTTATACCTTACCGTACTGGTTATTACTCAGGAGTAGCCGAGTCATGGTCAGACCACTATTTCACCAAGGGTAGCCGATCCATGCCGTAGGAGACACGCAAATAGCCACCGGGCATGTTTGCTAGACTGGAGGATACCCTTTGCCTCAGATCCGCATGTGCACTAAAGGCCCCTTGTAGCACGGAAGATGGATTTTATGGCAAGTTCTAAAAGTTTTGTTACTTCTTCGCAACCTCCAACCCGTACTATGAAATACTTTCCAGTCCCGGGTTTCTATGCGCGTCCTTATTGCTCAAGGTAAGAAATCTCTGTCCATCAAATTCGAGTGAGTCATGCGTAATTTAAAACAAGTTACTGAGTTCAAAGCTGGTCCTGGTGATGGTGTCCCGTACACCCAGAGAAAGTGATACTTAGCTTTACGCGTTAATGCCTCGGCAGACATCAGGTCCATACGAAGCAAGGAAAGCCCCGCCACTGCCACACGGCTAGTCCCCCGAGTCACTCCCAAGCCTGGCAACCTCGATACCCAAGAGGGA")

// repeat sequence in the end of query
// q = []byte("GACTGCCGACTGCCGACTGCCGACTGCCGACTGCCGACTGCCGACTGCCGACTGCCGACTGCCGACTGCCGACTGCCGACTGCCTCAGTGCCCGGCGCTCAAGCCTCAAGCCTCAAGCCTCAGGTCTCGCAGCCCACCGCATTCACCCGTGACACCGAACTGCATCGCGAACGCATTTCTCGCCGCAGCCGCGCGCACGGGCGACGCGGACTTGCCGGCAAGCCCGCGCGCCGCCCGATGCGCG")
// t = []byte("GACTGCCGACTGCCGACTGCCGACTGCCTCAGTGCCCGGCGCTCAAGCCTCAAGCCTCAAGCCTCAGGCCTCAGGCCTCGCAGCCCACCGCATTCACCCGTGACACCGAACTTCATCGCGAACGCATTTCTCGCCGCAGCCGCGCGCGCAGGCGACGCGGACTTGCCGGCAAGCCCGCGCGCCGCCCGATGCGCG")

q = bytes.ToUpper(q)
t = bytes.ToUpper(t)
result, err := algn.Align(q, t)
Expand All @@ -159,8 +163,8 @@ func TestWFA(_t *testing.T) {

if result != nil {
fmt.Println()
fmt.Printf("CIGAR: %s\n", result.CIGAR())
Q, A, T := result.AlignmentText(&q, &t)
fmt.Printf("CIGAR: %s\n", result.CIGAR(false))
Q, A, T := result.AlignmentText(&q, &t, false)
fmt.Printf("query %s\n", *Q)
fmt.Printf(" %s\n", *A)
fmt.Printf("target %s\n", *T)
Expand Down

0 comments on commit 1eb3d93

Please sign in to comment.