Skip to content

Commit

Permalink
Added --stable-rogues to shuffle sites
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Oct 13, 2017
1 parent 8f4921d commit a643d70
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 5 deletions.
19 changes: 15 additions & 4 deletions align/align.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ type Alignment interface {
Length() int
Mutate(rate float64) // Adds uniform substitutions in the alignment (~sequencing errors)
ShuffleSequences()
ShuffleSites(rate float64, roguerate float64) []string
ShuffleSites(rate float64, roguerate float64, randroguefirst bool) []string
SimulateRogue(prop float64, proplen float64) ([]string, []string)
Sort() // Sorts the alignment by sequence name
RemoveGapSites(cutoff float64) // Removes sites having >= cutoff gaps
Expand Down Expand Up @@ -255,8 +255,13 @@ func (a *align) ShuffleSequences() {
// rate must be >=0 and <=1
// Then, take roguerate proportion of the taxa, and will shuffle rate sites among the
// remaining intact sites
// randroguefirst: If true, then with a given seed, rogues will always be the same with all alignments
// having sequences in the same order. It may not be the case if false, especially when alignemnts
// have different lengths.
// Output: List of tax names that are more shuffled than others (length=roguerate*nbsequences)
func (a *align) ShuffleSites(rate float64, roguerate float64) []string {
func (a *align) ShuffleSites(rate float64, roguerate float64, randroguefirst bool) []string {
var sitepermutation, taxpermutation []int

if rate < 0 || rate > 1 {
io.ExitWithMessage(errors.New("Shuffle site rate must be >=0 and <=1"))
}
Expand All @@ -267,8 +272,14 @@ func (a *align) ShuffleSites(rate float64, roguerate float64) []string {
nb_sites_to_shuffle := int(rate * float64(a.Length()))
nb_rogue_sites_to_shuffle := int(rate * (1.0 - rate) * (float64(a.Length())))
nb_rogue_seq_to_shuffle := int(roguerate * float64(a.NbSequences()))
sitepermutation := rand.Perm(a.Length())
taxpermutation := rand.Perm(a.NbSequences())
if randroguefirst {
taxpermutation = rand.Perm(a.NbSequences())
sitepermutation = rand.Perm(a.Length())
} else {
sitepermutation = rand.Perm(a.Length())
taxpermutation = rand.Perm(a.NbSequences())
}

rogues := make([]string, nb_rogue_seq_to_shuffle)

if (nb_rogue_sites_to_shuffle + nb_sites_to_shuffle) > a.Length() {
Expand Down
4 changes: 3 additions & 1 deletion cmd/sites.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
var siteRate float64
var siteRogue float64
var siteRogueNameFile string
var stableRogues bool

// sitesCmd represents the sites command
var sitesCmd = &cobra.Command{
Expand Down Expand Up @@ -34,7 +35,7 @@ goalign shuffle sites -i align.fasta -r 0.5
f := openWriteFile(shuffleOutput)
nameFile := openWriteFile(siteRogueNameFile)
for al := range rootaligns {
names := al.ShuffleSites(siteRate, siteRogue)
names := al.ShuffleSites(siteRate, siteRogue, stableRogues)
writeAlign(al, f)
for _, n := range names {
nameFile.WriteString(n)
Expand All @@ -51,4 +52,5 @@ func init() {
sitesCmd.PersistentFlags().Float64VarP(&siteRate, "rate", "r", 0.5, "Rate of shuffled sites (>=0 and <=1)")
sitesCmd.PersistentFlags().Float64Var(&siteRogue, "rogue", 0.0, "If set, then will take the given proportion of taxa, and will apply shuffle again on --rate of the remaining intact sites")
sitesCmd.PersistentFlags().StringVar(&siteRogueNameFile, "rogue-file", "stdout", "Rogue sequence names output file")
sitesCmd.PersistentFlags().BoolVar(&stableRogues, "stable-rogues", false, "If true, then with a given seed, rogues will always be the same with all alignments having sequences in the same order. It may not be the case if false, especially when alignemnts have different lengths.")
}
40 changes: 40 additions & 0 deletions test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,46 @@ diff result expected
diff rogueexpected rogues.txt
rm -f expected result rogueexpected rogues.txt

echo "->goalign shuffle sites rogues stables"
cat > expected <<EOF
>Seq0000
CATTTATTTGCGGACGTCGTGAAAGTGTAGTTCAAACACATCAAACTGGC
>Seq0001
CCTTATACCGGAGGATACAAATGGAATAATATTTAGATCTTCAGAAAGCC
>Seq0002
AGTCGCAGCTAAGTATGGGTGATCTCTCTGAATACTATCCGAGTCAAGGG
>Seq0003
CTATGTTTCCGCTGTCGGGGCTCGAGGTCTGTAGAAGGTTTGGCGATAGG
>Seq0004
ATGAGCGCGGGGTAAATGCCCTTTAGACGAAGTCAGCGATTCACCGTTTA
>Seq0005
GCGAGGATCCCCCTCGAAAAATATTAAGGATGTTATCACTATTGTTCATC
>Seq0006
GAATTGAAGCTGTTACGCTATACCGTGGTGGGAATAGAGGGTGACACCCT
>Seq0007
ATAGTAAGCAGTAAGGCTCGCGAATGCCTCAGGTACACTGCTGTCTTCGC
>Seq0008
CACAAGCCTTGCGTGGATCAGCCGGAGCACGGCTACTGCCACTGATCTCC
>Seq0009
TTTACTACTCCACGGCTACACGGACCTAGATGCTTGATTACTAGGTCTGG
EOF

cat > rogueexpected <<EOF
Seq0001
Seq0002
Seq0008
Seq0004
Seq0006
EOF

goalign random -l 50 -s 10 | goalign shuffle sites -r 0.5 -s 10 --rogue 0.5 --rogue-file rogues.txt --stable-rogues > result
goalign random -l 30 -s 11 | goalign shuffle sites -r 0.5 -s 10 --rogue 0.5 --rogue-file rogues2.txt --stable-rogues > /dev/null
diff result expected
diff rogueexpected rogues.txt
# Should be the same list of rogues, even if random gen seed is
# different and length is different (initial seq order is the same)
diff rogues.txt rogues2.txt
rm -f expected result rogueexpected rogues.txt

echo "->goalign stats"
cat > expected <<EOF
Expand Down

0 comments on commit a643d70

Please sign in to comment.