diff --git a/common/base/include/claraparabricks/genomeworks/utils/graph.hpp b/common/base/include/claraparabricks/genomeworks/utils/graph.hpp index ef9a6da86..9b58f7117 100644 --- a/common/base/include/claraparabricks/genomeworks/utils/graph.hpp +++ b/common/base/include/claraparabricks/genomeworks/utils/graph.hpp @@ -160,6 +160,19 @@ class Graph } } + /// + /// \brief Generates a GFA S(sequence) line from a node's label and sequence. + /// + /// \param gfa_str An output stringstream to write S lines to. + void node_labels_to_gfa(std::ostringstream& gfa_str) const + { + for (auto& iter : node_labels_) + { + gfa_str << "S" + << "\t" << iter.first << "\t" << iter.second << std::endl; + } + } + /// \brief Serialize edges to dot format /// /// \param dot_str Output string stream to serialize labels to @@ -175,6 +188,27 @@ class Graph } } + /// + /// \brief Serialize edges to GFA (v1) format + /// + /// \param gfa_str An output stringstream to write output to. + void edges_to_gfa(std::ostringstream& gfa_str) const + { + for (auto& iter : edges_) + { + const edge_t& edge = iter.first; + const node_id_t edge_source = edge.first; + const node_id_t edge_sink = edge.second; + gfa_str << "L" + << "\t" << edge_source << "\t" + << "+" + << "\t" << edge_sink << "\t" + << "+" + << "\t" + << "*" << std::endl; + } + } + /// List of adjacent nodes per node ID std::unordered_map> adjacent_nodes_; @@ -214,6 +248,20 @@ class DirectedGraph : public Graph } } + /// \brief Serialize graph structure to GFA format + /// + /// \return A string encoding the graph in GFA format + std::string serialize_to_gfa() const + { + std::ostringstream gfa_str; + gfa_str << "H" + << "\t" + << "VN:Z:1.0" << std::endl; + node_labels_to_gfa(gfa_str); + edges_to_gfa(gfa_str); + return gfa_str.str(); + } + /// \brief Serialize graph structure to dot format /// /// \return A string encoding the graph in dot format diff --git a/cudapoa/src/application_parameters.cpp b/cudapoa/src/application_parameters.cpp index a1216a711..797c6f71d 100644 --- a/cudapoa/src/application_parameters.cpp +++ b/cudapoa/src/application_parameters.cpp @@ -43,6 +43,7 @@ ApplicationParameters::ApplicationParameters(int argc, char* argv[]) {"graph-length", required_argument, 0, 'l'}, {"pred-distance", required_argument, 0, 'D'}, {"dot", required_argument, 0, 'd'}, + {"gfa", required_argument, 0, 'G'}, {"max-groups", required_argument, 0, 'M'}, {"gpu-mem-alloc", required_argument, 0, 'R'}, {"match", required_argument, 0, 'm'}, @@ -52,7 +53,7 @@ ApplicationParameters::ApplicationParameters(int argc, char* argv[]) {"help", no_argument, 0, 'h'}, }; - std::string optstring = "i:ab:w:s:l:D:d:M:R:m:n:g:vh"; + std::string optstring = "i:ab:w:s:l:D:d:G:M:R:m:n:g:vh"; int32_t argument = 0; while ((argument = getopt_long(argc, argv, optstring.c_str(), options, nullptr)) != -1) @@ -91,6 +92,10 @@ ApplicationParameters::ApplicationParameters(int argc, char* argv[]) case 'd': graph_output_path = std::string(optarg); break; + case 'G': + graph_output_path = std::string(optarg); + output_gfa = true; + break; case 'M': max_groups = std::stoi(optarg); break; @@ -213,6 +218,9 @@ void ApplicationParameters::help(int32_t exit_code) -d, --dot output path for printing graph in DOT format [disabled])" << R"( + -G, --gfa + output path for printing graph in GFA format [disabled])" + << R"( -M, --max-groups maximum number of POA groups to create from file (-1 for all, > 0 for limited) [-1] repeats groups if less groups are present than specified)" diff --git a/cudapoa/src/application_parameters.hpp b/cudapoa/src/application_parameters.hpp index d2e38856f..2cf92706f 100644 --- a/cudapoa/src/application_parameters.hpp +++ b/cudapoa/src/application_parameters.hpp @@ -39,6 +39,7 @@ class ApplicationParameters std::vector input_paths; std::string graph_output_path; + bool output_gfa = false; bool all_fasta = true; bool msa = false; // consensus by default BandMode band_mode = BandMode::adaptive_band; diff --git a/cudapoa/src/main.cpp b/cudapoa/src/main.cpp index 2765492e5..181ea5f1c 100644 --- a/cudapoa/src/main.cpp +++ b/cudapoa/src/main.cpp @@ -254,7 +254,14 @@ int main(int argc, char* argv[]) batch->get_graphs(graph, graph_status); for (auto& g : graph) { - graph_output << g.serialize_to_dot() << std::endl; + if (parameters.output_gfa) + { + graph_output << g.serialize_to_gfa() << std::endl; + } + else + { + graph_output << g.serialize_to_dot() << std::endl; + } } }