diff --git a/align/align.go b/align/align.go index a824849..7df297d 100644 --- a/align/align.go +++ b/align/align.go @@ -7,6 +7,7 @@ import ( "log" "math" "math/rand" + "regexp" "sort" "strings" @@ -69,7 +70,7 @@ type Alignment interface { Rename(namemap map[string]string) Pssm(log bool, pseudocount float64, normalization int) (pssm map[rune][]float64, err error) // Normalization: PSSM_NORM_NONE, PSSM_NORM_UNIF, PSSM_NORM_DATA TrimNames(size int) (map[string]string, error) - CleanNames() // Removes spaces and tabs from sequence names + CleanNames() // Removes spaces and tabs at beginning and end of sequence names and replaces others by "_" TrimSequences(trimsize int, fromStart bool) error AppendSeqIdentifier(identifier string, right bool) AvgAllelesPerSite() float64 @@ -678,9 +679,12 @@ func (a *align) AppendSeqIdentifier(identifier string, right bool) { // Removes spaces and tabs from sequence names func (a *align) CleanNames() { + firstlast := regexp.MustCompile("(^[\\s\\t]+|[\\s\\t]+$)") + inside := regexp.MustCompile("[\\s\\t]+") + for _, seq := range a.seqs { - seq.name = strings.Replace(seq.name, " ", "", -1) - seq.name = strings.Replace(seq.name, "\t", "", -1) + seq.name = firstlast.ReplaceAllString(seq.name, "") + seq.name = inside.ReplaceAllString(seq.name, "_") } } diff --git a/align/align_test.go b/align/align_test.go index 9b879bf..61ecab6 100644 --- a/align/align_test.go +++ b/align/align_test.go @@ -45,6 +45,30 @@ func TestAppendIdentifier(t *testing.T) { }) } +func TestCleanNames(t *testing.T) { + a, err := RandomAlignment(AMINOACIDS, 300, 50) + if err != nil { + t.Error(err) + + } + a2, _ := a.Clone() + a.AppendSeqIdentifier("\t \t", false) + a.AppendSeqIdentifier("\t \t", true) + + a.CleanNames() + i := 0 + a.IterateChar(func(name string, sequence []rune) { + expected, found := a2.GetSequenceNameById(i) + if !found { + t.Error("Unknown sequence name after clean") + } + if name != expected { + t.Error("Unexpected sequence name after clean") + } + i++ + }) +} + func TestRemoveOneGapSite(t *testing.T) { a, err := RandomAlignment(AMINOACIDS, 300, 300) if err != nil { diff --git a/test.sh b/test.sh index 9921d5f..ad319ad 100755 --- a/test.sh +++ b/test.sh @@ -166,20 +166,20 @@ rm -f expected result mapfile echo "->goalign reformat phylip spaces tabs" cat > expected < input.fa < S e q 0 0 00 GATTA -> Se q00 0 1 +> Se q00 0 1 ATTTG -> Se q00 0 2 +> Se q00 0 2 CCGTA ->Seq00 03 +> Seq00 03 GGCCA >Seq 0004 GAATC