Skip to content

Commit

Permalink
Merge pull request #13 from jesvedberg/master
Browse files Browse the repository at this point in the history
Merged with Ancestry_HMM-S and prepared for distribution with Bioconda
  • Loading branch information
jesvedberg authored Jul 15, 2022
2 parents 97990d5 + 96d90d3 commit 502f2ed
Show file tree
Hide file tree
Showing 20 changed files with 3,240 additions and 7 deletions.
674 changes: 674 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

27 changes: 21 additions & 6 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,35 @@

TCFLAGS = -ltcmalloc
ARMAFLAGS = -larmadillo
CONDAFLAGS = -fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem ${CONDA_PREFIX}/include

all:
$(LINK.cc) -O3 -o ancestry_hmm ancestry_hmm.cpp $(ARMAFLAGS)
### if tcmalloc is installed, this can be linked using the command below instead
# $(LINK.cc) -O3 -o ancestry_hmm ancestry_hmm.cpp $(TCFLAGS) $(ARMAFLAGS)
$(CXX) -O3 $(CXXFLAGS) -o ancestry_hmm ancestry_hmm.cpp $(ARMAFLAGS)
$(CXX) -O3 $(CXXFLAGS) -o ahmm-s ahmms.cpp $(ARMAFLAGS)

conda:
$(CXX) -O3 $(CONDAFLAGS) -o ahmm-s ahmms.cpp -L ${CONDA_PREFIX}/lib -I ${CONDA_PREFIX}/include $(ARMAFLAGS)
$(CXX) -O3 $(CONDAFLAGS) -o ancestry_hmm ancestry_hmm.cpp -L ${CONDA_PREFIX}/lib -I ${CONDA_PREFIX}/include $(ARMAFLAGS)

ahmms:
$(CXX) -O3 $(CXXFLAGS) -o ahmm-s ahmms.cpp $(ARMAFLAGS)

ahmm:
$(CXX) -O3 $(CXXFLAGS) -o ancestry_hmm ancestry_hmm.cpp $(ARMAFLAGS)


## if you have a local install of google perftools, please add a TCFlag link.
### $(LINK.cc) -std=c++11 -O3 -o ahmm-s ahmms.cpp $(ARMAFLAGS) $(TCFLAGS)

## if you have a local armadillo installation, you will need to provide the directory during compile time and possible also link lblas and lapack
## our recommendation is to use miniconda3 to do the installation
## $ conda install -c conda-forge armadillo
## then you will have the appropriate lib and include files in your home directory under subdirectory miniconda3/
## so, replace USERNAME with your unix id on the following line and try this

#$(LINK.cc) -O3 -o ancestry_hmm ancestry_hmm.cpp -L /home/USERNAME/miniconda3/lib/ -I /home/USERNAME/miniconda3/include/ $(ARMAFLAGS)
## $(CXX) -std=c++11 -O3 -o ahmm-s ahmms.cpp -L ${CONDA_PREFIX}/lib -I ${CONDA_PREFIX}/include $(ARMAFLAGS)

## if it builds correctly, you will also need to link the library during runtime
## if it builds correctly, you may also need to link the library during runtime
## to do this, add the following line to your ~/.bash_profile or ~/.bashrc
## export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/USERNAME/miniconda3/lib/
## export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${CONDA_PREFIX}/lib

215 changes: 215 additions & 0 deletions src/ahmms.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
/*
copyright: Russ Corbett-Detig
rucorbet@ucsc.edu
Jesper Svedberg
jsvedber@ucsc.edu
This is software distributed under the gnu public license version 3.
*/

/// headers
#include <iostream>
#include <vector>
#include <map>
#include <time.h>
#include <string>
#include <fstream>
#include <algorithm>

// Includes specific for Ancestry_HMM-S
#include <cmath>
#include <cstring>
#include <utility>
#include <iomanip>
#include <cstdlib>
#include <random> // ++++ REQUIRES C++11 ++++


/// linear algebra library is armadillo
#define ARMA_NO_DEBUG
#include <armadillo>
using namespace arma ;
using namespace std ;

/// our header files in /src directory
#include "selection_print_usage.h" // JS
#include "factorial.h"
#include "nchoosek.h"
#include "selection_subsample.h"
#include "multichoose.h"
#include "multipermute.h"
#include "normalize.h"
#include "ancestry_pulse.h"
#include "ploidy_path.h"
#include "selection_class.h" // JS
#include "selection_markov_chain.h"
#include "read_samples.h"
#include "pulses_to_ancestry.h"
#include "compute_forward.h"
#include "compute_backward.h"
#include "forward_backward.h"
#include "viterbi.h"
#include "transition_information.h"
#include "exponentiate_matrix.h"
#include "selection_cmd_line.h"
#include "create_transition_rates.h"
#include "selection_read_cmd_line.h"
#include "evaluate_vertex.h"
#include "check_vertex.h"
#include "sort_vertices.h"
#include "create_pulses.h"
#include "create_states.h"
#include "input_line.h"
#include "distribute_alleles.h"
#include "binomial.h"
#include "read_emissions.h"
#include "genotype_emissions.h"
#include "selection_read_input.h"
#include "nelder_mead.h"
#include "golden_search.h"
#include "bootstrap.h"

// Includes specific for Ancestry_HMM-S
#include "selection_get_position.h"
#include "selection_optimize_test_func.h" // Function for testing Nelder-Mead. Remove?
#include "selection_fwd_iter.h"
#include "selection_trajectory.h"
#include "selection_split_vector.h"
#include "selection_forward.h"
#include "selection_stochastic_traj.h"
#include "selection_transition_rates.h"




int main ( int argc, char *argv[] ) {

/// time tracking
clock_t t = clock() ;
clock_t total = clock() ;

/// seed prng
srand (t) ;

// read cmd line
cmd_line options ;
cerr << "reading command line" ; t = clock();
options.read_cmd_line( argc, argv ) ;

/// chain objects for each sample
vector<markov_chain> markov_chain_information ;

/// get sample ids and ploidy from input file
cerr << "\t\t\t\t" << (double) (clock() - t) << " ms\n" << "reading sample ids and ploidy" ; t = clock();
read_samples( markov_chain_information, options.sample_file, options.viterbi ) ;

/// create states matrix
cerr << "\t\t\t" << (double) (clock() - t) << " ms\n" << "creating states matrix" ; t = clock();
/// store all possible state space arranged by ploidy and then vector of state counts
map<int,vector<vector<int> > > state_list ;
/// now create initial state list
for ( int m = 0 ; m < markov_chain_information.size() ; m ++ ) {
for ( int p = 0 ; p < markov_chain_information[m].sample_ploidy_path.size() ; p ++ ) {
create_initial_states( markov_chain_information.at(m).sample_ploidy_path[p].ploidy, options.ancestry_pulses, state_list ) ;
}
}

/// read in panels and update matrices
cerr << "\t\t\t\t" << (double) (clock() - t) << " ms\n" << "reading data and creating emissions matrices\t" ; t = clock() ;
/// store recombination rates and positions
vector<int> position ;
vector<double> recombination_rate ;
vector<string> chromosomes ;
int sel_pos ;
read_file( options, markov_chain_information, state_list, position, recombination_rate, chromosomes, sel_pos ) ;



/// create basic transition information
cerr << (double) (clock() - t) << " ms" << endl << "computing transition routes\t\t\t" ; t = clock() ;
/// 3d map to look up by ploidy, start state, end state, and then relevant transition information
map<int, vector<vector< map< vector<transition_information>, double > > > > transition_matrix_information ;
for ( int m = 0 ; m < markov_chain_information.size() ; m ++ ) {
for ( int p = 0 ; p < markov_chain_information[m].sample_ploidy_path.size() ; p ++ ) {
create_transition_information( markov_chain_information.at(m).sample_ploidy_path[p].ploidy, transition_matrix_information, state_list[markov_chain_information.at(m).sample_ploidy_path[p].ploidy] ) ;
}
}
cerr << endl;


// Below are ahmm-s specific options

// If using grid search with --grid flag
if (options.calc_grid == true) {
int p_start = options.grid_pstart;
int p_stop = options.grid_pstop;
int p_step = options.grid_pstep;

double s_start = options.grid_sstart;
double s_stop = options.grid_sstop;

if ( options.limit_sel_space == true ) {
s_stop = selection_get_max_sel(options.grid_sstart, options.grid_sstop, options.grid_sstep, options.ancestry_pulses[1].proportion, options.ancestry_pulses[1].time, options.ne);
}
double s_step = options.grid_sstep;

cerr << "Grid search. Likelihood calculated for values of selection between " << s_start << " and " << s_stop << endl;

selection_grid(p_start, p_stop, p_step, s_start, s_stop, s_step, markov_chain_information, transition_matrix_information, recombination_rate, position, options, state_list);
return 0;
}



// If testing a single point using --site flag.
if (options.test_point == true) {
cerr << "Evaluating point: " << options.test_pos << ", " << options.test_sel << endl;

map <double,vector<double> > sel_trajectories;
vector <vector<double> > split_vecs;
int testpos;
selection point0;

if (options.is_coord == true) {
testpos = get_position(options.test_pos, position);

if (testpos == -1) {
cerr << "ERROR: specified site not found on chromosome" << endl;
exit(1);
}

}
else {
testpos = options.test_pos;
}

point0.pos = testpos;
point0.sel = 0;
selection_evaluate_point_genotypes( point0, markov_chain_information, transition_matrix_information, recombination_rate, position, options, state_list, split_vecs, sel_trajectories ) ;

selection point1;
point1.pos = testpos;
point1.sel = options.test_sel;
selection_evaluate_point_genotypes( point1, markov_chain_information, transition_matrix_information, recombination_rate, position, options, state_list, split_vecs, sel_trajectories ) ;

cout << "lnL for a selected site s=" << options.test_sel << " at position " << position[point0.pos] << " is: " << point1.lnl-point0.lnl << endl;

return 0;
}



// If using Golden Section Search with --gss flag
if (options.run_gss == true) {
selection_golden_section(markov_chain_information, transition_matrix_information, recombination_rate, position, options, state_list);
return 0;
}


return 0 ;
}


2 changes: 1 addition & 1 deletion src/read_cmd_line.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ void cmd_line::read_cmd_line ( int argc, char *argv[] ) {

if ( strcmp(argv[i],"--help") == 0 ) {
print_usage() ;
exit(1) ;
exit(0) ;
}

if ( strcmp(argv[i],"-g") == 0 ) {
Expand Down
21 changes: 21 additions & 0 deletions src/selection_class.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#ifndef __SELECTION_CLASS_H
#define __SELECTION_CLASS_H

class selection {
public:
int pos;
double sel;
double lnl;

/// sort pulses by time
friend bool operator < ( const selection &a, const selection &b ) {
return a.lnl < b.lnl ;
}
} ;

ostream& operator<< (ostream &out, selection const& point) {
out << "Selection point. pos:" << point.pos << " sel:" << setprecision(15) << point.sel << " lnL: " << point.lnl;
return out;
}

#endif
Loading

0 comments on commit 502f2ed

Please sign in to comment.