forked from douglasgscofield/sparseMEM-big
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcreate_mummer_index.cpp
87 lines (67 loc) · 2.19 KB
/
create_mummer_index.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#include <iostream>
#include <iomanip>
#include <fstream>
#include <vector>
#include "sparseSA.hpp"
#include "fasta.hpp"
#include <getopt.h>
#include <time.h>
#include <sys/time.h>
#include <cctype> // std::tolower(), uppercase/lowercase conversion
#include <boost/archive/binary_oarchive.hpp>
// NOTE use of special characters ~, `, and $ !!!!!!!!
using namespace std;
void usage(string prog);
bool _4column = false;
int K = 1;
sparseSA *sa;
int main(int argc, char* argv[]) {
while (1) {
static struct option long_options[] = {
{"k", 1, 0, 0}, // 0
{"F", 0, 0, 0} // 1
};
int longindex = -1;
int c = getopt_long_only(argc, argv, "", long_options, &longindex);
if(c == -1) break; // Done parsing flags.
else if(c == '?') { // If the user entered junk, let him know.
cerr << "Invalid parameters." << endl;
usage(argv[0]);
}
else {
// Branch on long options.
switch(longindex) {
case 0: K = atoi(optarg); break;
case 1: _4column = true; break;
default: break;
}
}
}
if (argc - optind != 1) usage(argv[0]);
string ref_fasta = argv[optind];
string ref;
size_t pos = ref_fasta.find_last_of(".");
string output_name = ref_fasta.substr(0, pos) + ".mum";
vector<string> refdescr;
vector<long> startpos;
load_fasta(ref_fasta, ref, refdescr, startpos);
// Automatically use 4 column format if there are multiple reference sequences.
if(startpos.size() > 1) _4column = true;
sa = new sparseSA(ref, refdescr, startpos, _4column, K);
std::ofstream of(output_name.c_str(), std::ios::binary | std::ios::out);
boost::archive::binary_oarchive oa(of);
oa << *sa;
delete sa;
}
void usage(string prog) {
cerr << "Usage: " << prog << " <reference-file>" << endl;
cerr << "Additional options:" << endl;
cerr << "-k sampled suffix positions (one by default)" << endl;
cerr << "-F force 4 column output format regardless of the number of" << endl;
cerr << " reference sequence inputs" << endl;
cerr << "Example usage:" << endl;
cerr << endl;
cerr << "./create_mummer_index ref.fa" << endl;
cerr << "Create an index file for ref.fa" << endl;
exit(1);
}