Skip to content

Commit

Permalink
Added option --frac/-f to goalign seqboot/distboot #8
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Jan 6, 2021
1 parent da9722e commit d3aa112
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 21 deletions.
23 changes: 16 additions & 7 deletions align/align.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ type Alignment interface {
AddGaps(rate, lenprop float64)
Append(Alignment) error // Appends alignment sequences to this alignment
AvgAllelesPerSite() float64
BuildBootstrap() Alignment // Bootstrap alignment
BuildBootstrap(frac float64) Alignment // Bootstrap alignment
CharStatsSite(site int) (map[uint8]int, error)
Clone() (Alignment, error)
CodonAlign(ntseqs SeqBag) (codonAl *align, err error)
Expand Down Expand Up @@ -878,17 +878,26 @@ func (a *align) Rarefy(nb int, counts map[string]int) (al Alignment, err error)
return
}

// This function builds a bootstrap alignment
// and returns it with "indices", an array containing
// the index (in the original alignment) of all bootstrap sites.
func (a *align) BuildBootstrap() (boot Alignment) {
n := a.Length()
// BuildBootstrap builds a bootstrap alignment
// if frac is < 1.0, it is a partial bootstrap as is phylip seqboot,
// which means that the sites are sampled from the full alignment with
// replacement, but the output alignment length is a fraction of the
// original alignment.
// (see https://evolution.genetics.washington.edu/phylip/doc/seqboot.html)
func (a *align) BuildBootstrap(frac float64) (boot Alignment) {
if frac <= 0 || frac > 1 {
frac = 1.0
}

alength := a.Length()
n := int(frac * float64(alength))

boot = NewAlign(a.alphabet)
indices := make([]int, n)
var buf []uint8

for i := 0; i < n; i++ {
indices[i] = rand.Intn(n)
indices[i] = rand.Intn(alength)
}

for _, seq := range a.seqs {
Expand Down
28 changes: 16 additions & 12 deletions cmd/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ var bootstrapoutprefix string
var bootstrapOrder bool
var bootstraptar bool
var bootstrapgz bool
var bootstrapfrac float64
var bootstrappartitionstr string
var bootstrapoutputpartitionstr string

Expand All @@ -42,6 +43,10 @@ The input may be a Phylip or Fasta file.
- It is possible to give a initial seed (--seed). In this case several runs of
the tool will give the exact same results.
- If frac is < 1.0, output alignments are partial bootstrap alignments as is phylip
seqboot, which means that the sites are sampled from the full alignment with
replacement, but the bootstrap alignment length is a fraction of the original alignment.
Example of usage:
goalign build seqboot -i align.phylip -p -n 500 -o boot --tar-gz
Expand Down Expand Up @@ -127,7 +132,7 @@ goalign build seqboot -i align.phylip -p -n 500 -o boot_
// several partitions. We generate bootstrap replicates
// for each partition, and then concatenate them all.
for _, a := range aligns {
tmpboot = a.BuildBootstrap()
tmpboot = a.BuildBootstrap(bootstrapfrac)
if boot == nil {
boot = tmpboot
} else {
Expand Down Expand Up @@ -187,21 +192,19 @@ func writenewfile(name string, gz bool, bootstring string) (err error) {
if gz {
if f, err = os.Create(name + ".gz"); err != nil {
return
} else {
gw := gzip.NewWriter(f)
buf := bufio.NewWriter(gw)
buf.WriteString(bootstring)
buf.Flush()
gw.Close()
f.Close()
}
gw := gzip.NewWriter(f)
buf := bufio.NewWriter(gw)
buf.WriteString(bootstring)
buf.Flush()
gw.Close()
f.Close()
} else {
if f, err = os.Create(name); err != nil {
return
} else {
f.WriteString(bootstring)
f.Close()
}
f.WriteString(bootstring)
f.Close()
}
return
}
Expand All @@ -227,7 +230,7 @@ func addstringtotargz(tw *tar.Writer, name string, align string) error {
return nil
}

func min_int(a, b int) int {
func minInt(a, b int) int {
if a < b {
return a
}
Expand All @@ -241,6 +244,7 @@ func init() {
seqbootCmd.PersistentFlags().BoolVar(&bootstraptar, "tar", false, "Will create a single tar file with all bootstrap alignments (one thread for tar, but not a bottleneck)")
seqbootCmd.PersistentFlags().BoolVar(&bootstrapgz, "gz", false, "Will gzip output file(s). Maybe slow if combined with --tar (only one thread working for tar/gz)")
seqbootCmd.PersistentFlags().IntVarP(&bootstrapNb, "nboot", "n", 1, "Number of bootstrap replicates to build")
seqbootCmd.PersistentFlags().Float64VarP(&bootstrapfrac, "frac", "f", 1.0, "Fraction of sites to sample (if < 1.0: Partial bootstrap as in phylip seqboot)")
seqbootCmd.PersistentFlags().StringVar(&bootstrappartitionstr, "partition", "none", "File containing definition of the partitions")
seqbootCmd.PersistentFlags().StringVar(&bootstrapoutputpartitionstr, "out-partition", "", "File containing output partitions (default: same name as input partition with _boot suffix)")
seqbootCmd.PersistentFlags().StringVarP(&bootstrapoutprefix, "out-prefix", "o", "none", "Prefix of output bootstrap files")
Expand Down
12 changes: 10 additions & 2 deletions cmd/distboot.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ var distbootAlpha float64
var distbootmodel string
var distbootcontinuous bool = false
var distbootRemoveGaps bool
var distbootFrac float64

// distbootCmd represents the distboot command
var distbootCmd = &cobra.Command{
Expand Down Expand Up @@ -47,6 +48,12 @@ Proteins:
For example:
goalign build distboot -m k2p -i align.fa -o mats.txt
If frac is < 1.0, bootstrap alignment used for computing distances is a partial
bootstrap as is phylip seqboot, which means that the sites are sampled from the
full alignment with replacement, but the bootstrap alignment length is a fraction of the
original alignment.
`,
//If -c is given, then random continuous weights are associated to all sites.
//Weights follow a Dirichlet distribution D(n;1,...,1)
Expand Down Expand Up @@ -89,7 +96,7 @@ goalign build distboot -m k2p -i align.fa -o mats.txt
}
writeDenseDistBootMatrix(d, align, f)
} else {
boot := align.BuildBootstrap()
boot := align.BuildBootstrap(distbootFrac)
if _, _, d, err = protmodel.MLDist(boot, nil); err != nil {
io.LogError(err)
return
Expand All @@ -112,7 +119,7 @@ goalign build distboot -m k2p -i align.fa -o mats.txt
return
}
} else {
boot := align.BuildBootstrap()
boot := align.BuildBootstrap(distbootFrac)
if distMatrix, err = dna.DistMatrix(boot, nil, dnamodel, -1, -1, -1, -1, cmd.Flags().Changed("alpha"), distbootAlpha, rootcpus); err != nil {
io.LogError(err)
return
Expand All @@ -130,6 +137,7 @@ func init() {
distbootCmd.PersistentFlags().StringVarP(&distbootOutput, "output", "o", "stdout", "Distance matrices output file")
distbootCmd.PersistentFlags().StringVarP(&distbootmodel, "model", "m", "k2p", "Model for distance computation")
distbootCmd.PersistentFlags().IntVarP(&distbootnb, "nboot", "n", 1, "Number of bootstrap replicates to build")
distbootCmd.PersistentFlags().Float64VarP(&distbootFrac, "frac", "f", 1.0, "Fraction of sites to sample (if < 1.0: Partial bootstrap as in phylip seqboot)")
//distbootCmd.PersistentFlags().BoolVarP(&distbootcontinuous, "continuous", "c", false, "Bootstraps are done by weighting alignment with continuous weights (dirichlet)")
distbootCmd.PersistentFlags().BoolVarP(&distbootRemoveGaps, "rm-gaps", "r", false, "Do not take into account positions containing >=1 gaps")
distbootCmd.PersistentFlags().Float64Var(&distbootAlpha, "alpha", 0.0, "Gamma alpha parameter, if not given : no gamma")
Expand Down
2 changes: 2 additions & 0 deletions docs/commands/build.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ This command builds bootstrap replicates from an input alignment (fasta or phyli
- f84 : Felsenstein 84
- tn93 : Tamura and Nei 1993

If --frac/-f option is < 1.0, then bootstrap alignments (or the ones used for computing distances) are partial bootstraps as is phylip seqboot. It means that the sites are sampled from the full alignment with replacement, but the bootstrap alignment length is a fraction of the original alignment.

#### Usage

* General command
Expand Down

0 comments on commit d3aa112

Please sign in to comment.