Skip to content

Commit

Permalink
Merge pull request #551 from clara-parabricks/poa-gfa
Browse files Browse the repository at this point in the history
[cudapoa] GFA output
  • Loading branch information
Joyjit Daw authored Dec 4, 2020
2 parents c68960e + 0e10f24 commit 16d5272
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 2 deletions.
48 changes: 48 additions & 0 deletions common/base/include/claraparabricks/genomeworks/utils/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,19 @@ class Graph
}
}

///
/// \brief Generates a GFA S(sequence) line from a node's label and sequence.
///
/// \param gfa_str An output stringstream to write S lines to.
void node_labels_to_gfa(std::ostringstream& gfa_str) const
{
for (auto& iter : node_labels_)
{
gfa_str << "S"
<< "\t" << iter.first << "\t" << iter.second << std::endl;
}
}

/// \brief Serialize edges to dot format
///
/// \param dot_str Output string stream to serialize labels to
Expand All @@ -175,6 +188,27 @@ class Graph
}
}

///
/// \brief Serialize edges to GFA (v1) format
///
/// \param gfa_str An output stringstream to write output to.
void edges_to_gfa(std::ostringstream& gfa_str) const
{
for (auto& iter : edges_)
{
const edge_t& edge = iter.first;
const node_id_t edge_source = edge.first;
const node_id_t edge_sink = edge.second;
gfa_str << "L"
<< "\t" << edge_source << "\t"
<< "+"
<< "\t" << edge_sink << "\t"
<< "+"
<< "\t"
<< "*" << std::endl;
}
}

/// List of adjacent nodes per node ID
std::unordered_map<node_id_t, std::vector<node_id_t>> adjacent_nodes_;

Expand Down Expand Up @@ -214,6 +248,20 @@ class DirectedGraph : public Graph
}
}

/// \brief Serialize graph structure to GFA format
///
/// \return A string encoding the graph in GFA format
std::string serialize_to_gfa() const
{
std::ostringstream gfa_str;
gfa_str << "H"
<< "\t"
<< "VN:Z:1.0" << std::endl;
node_labels_to_gfa(gfa_str);
edges_to_gfa(gfa_str);
return gfa_str.str();
}

/// \brief Serialize graph structure to dot format
///
/// \return A string encoding the graph in dot format
Expand Down
10 changes: 9 additions & 1 deletion cudapoa/src/application_parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ ApplicationParameters::ApplicationParameters(int argc, char* argv[])
{"graph-length", required_argument, 0, 'l'},
{"pred-distance", required_argument, 0, 'D'},
{"dot", required_argument, 0, 'd'},
{"gfa", required_argument, 0, 'G'},
{"max-groups", required_argument, 0, 'M'},
{"gpu-mem-alloc", required_argument, 0, 'R'},
{"match", required_argument, 0, 'm'},
Expand All @@ -52,7 +53,7 @@ ApplicationParameters::ApplicationParameters(int argc, char* argv[])
{"help", no_argument, 0, 'h'},
};

std::string optstring = "i:ab:w:s:l:D:d:M:R:m:n:g:vh";
std::string optstring = "i:ab:w:s:l:D:d:G:M:R:m:n:g:vh";

int32_t argument = 0;
while ((argument = getopt_long(argc, argv, optstring.c_str(), options, nullptr)) != -1)
Expand Down Expand Up @@ -91,6 +92,10 @@ ApplicationParameters::ApplicationParameters(int argc, char* argv[])
case 'd':
graph_output_path = std::string(optarg);
break;
case 'G':
graph_output_path = std::string(optarg);
output_gfa = true;
break;
case 'M':
max_groups = std::stoi(optarg);
break;
Expand Down Expand Up @@ -213,6 +218,9 @@ void ApplicationParameters::help(int32_t exit_code)
-d, --dot <file>
output path for printing graph in DOT format [disabled])"
<< R"(
-G, --gfa <file>
output path for printing graph in GFA format [disabled])"
<< R"(
-M, --max-groups <int>
maximum number of POA groups to create from file (-1 for all, > 0 for limited) [-1]
repeats groups if less groups are present than specified)"
Expand Down
1 change: 1 addition & 0 deletions cudapoa/src/application_parameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class ApplicationParameters

std::vector<std::string> input_paths;
std::string graph_output_path;
bool output_gfa = false;
bool all_fasta = true;
bool msa = false; // consensus by default
BandMode band_mode = BandMode::adaptive_band;
Expand Down
9 changes: 8 additions & 1 deletion cudapoa/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,14 @@ int main(int argc, char* argv[])
batch->get_graphs(graph, graph_status);
for (auto& g : graph)
{
graph_output << g.serialize_to_dot() << std::endl;
if (parameters.output_gfa)
{
graph_output << g.serialize_to_gfa() << std::endl;
}
else
{
graph_output << g.serialize_to_dot() << std::endl;
}
}
}

Expand Down

0 comments on commit 16d5272

Please sign in to comment.