Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged with Ancestry_HMM-S and prepared for distribution with Bioconda #13

Merged
merged 2 commits into from
Jul 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
674 changes: 674 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

27 changes: 21 additions & 6 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,35 @@

TCFLAGS = -ltcmalloc
ARMAFLAGS = -larmadillo
CONDAFLAGS = -fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem ${CONDA_PREFIX}/include

all:
$(LINK.cc) -O3 -o ancestry_hmm ancestry_hmm.cpp $(ARMAFLAGS)
### if tcmalloc is installed, this can be linked using the command below instead
# $(LINK.cc) -O3 -o ancestry_hmm ancestry_hmm.cpp $(TCFLAGS) $(ARMAFLAGS)
$(CXX) -O3 $(CXXFLAGS) -o ancestry_hmm ancestry_hmm.cpp $(ARMAFLAGS)
$(CXX) -O3 $(CXXFLAGS) -o ahmm-s ahmms.cpp $(ARMAFLAGS)

conda:
$(CXX) -O3 $(CONDAFLAGS) -o ahmm-s ahmms.cpp -L ${CONDA_PREFIX}/lib -I ${CONDA_PREFIX}/include $(ARMAFLAGS)
$(CXX) -O3 $(CONDAFLAGS) -o ancestry_hmm ancestry_hmm.cpp -L ${CONDA_PREFIX}/lib -I ${CONDA_PREFIX}/include $(ARMAFLAGS)

ahmms:
$(CXX) -O3 $(CXXFLAGS) -o ahmm-s ahmms.cpp $(ARMAFLAGS)

ahmm:
$(CXX) -O3 $(CXXFLAGS) -o ancestry_hmm ancestry_hmm.cpp $(ARMAFLAGS)


## if you have a local install of google perftools, please add a TCFlag link.
### $(LINK.cc) -std=c++11 -O3 -o ahmm-s ahmms.cpp $(ARMAFLAGS) $(TCFLAGS)

## if you have a local armadillo installation, you will need to provide the directory during compile time and possible also link lblas and lapack
## our recommendation is to use miniconda3 to do the installation
## $ conda install -c conda-forge armadillo
## then you will have the appropriate lib and include files in your home directory under subdirectory miniconda3/
## so, replace USERNAME with your unix id on the following line and try this

#$(LINK.cc) -O3 -o ancestry_hmm ancestry_hmm.cpp -L /home/USERNAME/miniconda3/lib/ -I /home/USERNAME/miniconda3/include/ $(ARMAFLAGS)
## $(CXX) -std=c++11 -O3 -o ahmm-s ahmms.cpp -L ${CONDA_PREFIX}/lib -I ${CONDA_PREFIX}/include $(ARMAFLAGS)

## if it builds correctly, you will also need to link the library during runtime
## if it builds correctly, you may also need to link the library during runtime
## to do this, add the following line to your ~/.bash_profile or ~/.bashrc
## export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/USERNAME/miniconda3/lib/
## export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${CONDA_PREFIX}/lib

215 changes: 215 additions & 0 deletions src/ahmms.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
/*

copyright: Russ Corbett-Detig
rucorbet@ucsc.edu

Jesper Svedberg
jsvedber@ucsc.edu

This is software distributed under the gnu public license version 3.

*/

/// headers
#include <iostream>
#include <vector>
#include <map>
#include <time.h>
#include <string>
#include <fstream>
#include <algorithm>

// Includes specific for Ancestry_HMM-S
#include <cmath>
#include <cstring>
#include <utility>
#include <iomanip>
#include <cstdlib>
#include <random> // ++++ REQUIRES C++11 ++++


/// linear algebra library is armadillo
#define ARMA_NO_DEBUG
#include <armadillo>
using namespace arma ;
using namespace std ;

/// our header files in /src directory
#include "selection_print_usage.h" // JS
#include "factorial.h"
#include "nchoosek.h"
#include "selection_subsample.h"
#include "multichoose.h"
#include "multipermute.h"
#include "normalize.h"
#include "ancestry_pulse.h"
#include "ploidy_path.h"
#include "selection_class.h" // JS
#include "selection_markov_chain.h"
#include "read_samples.h"
#include "pulses_to_ancestry.h"
#include "compute_forward.h"
#include "compute_backward.h"
#include "forward_backward.h"
#include "viterbi.h"
#include "transition_information.h"
#include "exponentiate_matrix.h"
#include "selection_cmd_line.h"
#include "create_transition_rates.h"
#include "selection_read_cmd_line.h"
#include "evaluate_vertex.h"
#include "check_vertex.h"
#include "sort_vertices.h"
#include "create_pulses.h"
#include "create_states.h"
#include "input_line.h"
#include "distribute_alleles.h"
#include "binomial.h"
#include "read_emissions.h"
#include "genotype_emissions.h"
#include "selection_read_input.h"
#include "nelder_mead.h"
#include "golden_search.h"
#include "bootstrap.h"

// Includes specific for Ancestry_HMM-S
#include "selection_get_position.h"
#include "selection_optimize_test_func.h" // Function for testing Nelder-Mead. Remove?
#include "selection_fwd_iter.h"
#include "selection_trajectory.h"
#include "selection_split_vector.h"
#include "selection_forward.h"
#include "selection_stochastic_traj.h"
#include "selection_transition_rates.h"




int main ( int argc, char *argv[] ) {

/// time tracking
clock_t t = clock() ;
clock_t total = clock() ;

/// seed prng
srand (t) ;

// read cmd line
cmd_line options ;
cerr << "reading command line" ; t = clock();
options.read_cmd_line( argc, argv ) ;

/// chain objects for each sample
vector<markov_chain> markov_chain_information ;

/// get sample ids and ploidy from input file
cerr << "\t\t\t\t" << (double) (clock() - t) << " ms\n" << "reading sample ids and ploidy" ; t = clock();
read_samples( markov_chain_information, options.sample_file, options.viterbi ) ;

/// create states matrix
cerr << "\t\t\t" << (double) (clock() - t) << " ms\n" << "creating states matrix" ; t = clock();
/// store all possible state space arranged by ploidy and then vector of state counts
map<int,vector<vector<int> > > state_list ;
/// now create initial state list
for ( int m = 0 ; m < markov_chain_information.size() ; m ++ ) {
for ( int p = 0 ; p < markov_chain_information[m].sample_ploidy_path.size() ; p ++ ) {
create_initial_states( markov_chain_information.at(m).sample_ploidy_path[p].ploidy, options.ancestry_pulses, state_list ) ;
}
}

/// read in panels and update matrices
cerr << "\t\t\t\t" << (double) (clock() - t) << " ms\n" << "reading data and creating emissions matrices\t" ; t = clock() ;
/// store recombination rates and positions
vector<int> position ;
vector<double> recombination_rate ;
vector<string> chromosomes ;
int sel_pos ;
read_file( options, markov_chain_information, state_list, position, recombination_rate, chromosomes, sel_pos ) ;



/// create basic transition information
cerr << (double) (clock() - t) << " ms" << endl << "computing transition routes\t\t\t" ; t = clock() ;
/// 3d map to look up by ploidy, start state, end state, and then relevant transition information
map<int, vector<vector< map< vector<transition_information>, double > > > > transition_matrix_information ;
for ( int m = 0 ; m < markov_chain_information.size() ; m ++ ) {
for ( int p = 0 ; p < markov_chain_information[m].sample_ploidy_path.size() ; p ++ ) {
create_transition_information( markov_chain_information.at(m).sample_ploidy_path[p].ploidy, transition_matrix_information, state_list[markov_chain_information.at(m).sample_ploidy_path[p].ploidy] ) ;
}
}
cerr << endl;


// Below are ahmm-s specific options

// If using grid search with --grid flag
if (options.calc_grid == true) {
int p_start = options.grid_pstart;
int p_stop = options.grid_pstop;
int p_step = options.grid_pstep;

double s_start = options.grid_sstart;
double s_stop = options.grid_sstop;

if ( options.limit_sel_space == true ) {
s_stop = selection_get_max_sel(options.grid_sstart, options.grid_sstop, options.grid_sstep, options.ancestry_pulses[1].proportion, options.ancestry_pulses[1].time, options.ne);
}
double s_step = options.grid_sstep;

cerr << "Grid search. Likelihood calculated for values of selection between " << s_start << " and " << s_stop << endl;

selection_grid(p_start, p_stop, p_step, s_start, s_stop, s_step, markov_chain_information, transition_matrix_information, recombination_rate, position, options, state_list);
return 0;
}



// If testing a single point using --site flag.
if (options.test_point == true) {
cerr << "Evaluating point: " << options.test_pos << ", " << options.test_sel << endl;

map <double,vector<double> > sel_trajectories;
vector <vector<double> > split_vecs;
int testpos;
selection point0;

if (options.is_coord == true) {
testpos = get_position(options.test_pos, position);

if (testpos == -1) {
cerr << "ERROR: specified site not found on chromosome" << endl;
exit(1);
}

}
else {
testpos = options.test_pos;
}

point0.pos = testpos;
point0.sel = 0;
selection_evaluate_point_genotypes( point0, markov_chain_information, transition_matrix_information, recombination_rate, position, options, state_list, split_vecs, sel_trajectories ) ;

selection point1;
point1.pos = testpos;
point1.sel = options.test_sel;
selection_evaluate_point_genotypes( point1, markov_chain_information, transition_matrix_information, recombination_rate, position, options, state_list, split_vecs, sel_trajectories ) ;

cout << "lnL for a selected site s=" << options.test_sel << " at position " << position[point0.pos] << " is: " << point1.lnl-point0.lnl << endl;

return 0;
}



// If using Golden Section Search with --gss flag
if (options.run_gss == true) {
selection_golden_section(markov_chain_information, transition_matrix_information, recombination_rate, position, options, state_list);
return 0;
}


return 0 ;
}


2 changes: 1 addition & 1 deletion src/read_cmd_line.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ void cmd_line::read_cmd_line ( int argc, char *argv[] ) {

if ( strcmp(argv[i],"--help") == 0 ) {
print_usage() ;
exit(1) ;
exit(0) ;
}

if ( strcmp(argv[i],"-g") == 0 ) {
Expand Down
21 changes: 21 additions & 0 deletions src/selection_class.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#ifndef __SELECTION_CLASS_H
#define __SELECTION_CLASS_H

class selection {
public:
int pos;
double sel;
double lnl;

/// sort pulses by time
friend bool operator < ( const selection &a, const selection &b ) {
return a.lnl < b.lnl ;
}
} ;

ostream& operator<< (ostream &out, selection const& point) {
out << "Selection point. pos:" << point.pos << " sel:" << setprecision(15) << point.sel << " lnL: " << point.lnl;
return out;
}

#endif
Loading