Skip to content

Commit

Permalink
[cudapoa] Add optional graph output in GFA format.
Browse files Browse the repository at this point in the history
Adds methods for outputting the graph structure generated
during partial order alignment in GFA format and corresponding
optional command line arguments in main.cpp
  • Loading branch information
edawson committed Sep 1, 2020
1 parent bbabea4 commit f8126ef
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 2 deletions.
45 changes: 45 additions & 0 deletions common/base/include/claraparabricks/genomeworks/utils/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,19 @@ class Graph
}
}

///
/// \brief Generates a GFA S(sequence) line from a node's label and sequence.
///
/// \param gfa_str An output stringstream to write S lines to.
void node_labels_to_gfa(std::ostringstream& gfa_str) const
{
for (auto& iter : node_labels_)
{
gfa_str << "S"
<< "\t" << iter.first << "\t" << iter.second << std::endl;
}
}

/// \brief Serialize edges to dot format
///
/// \param dot_str Output string stream to serialize labels to
Expand All @@ -175,6 +188,27 @@ class Graph
}
}

///
/// \brief Serialize edges to GFA (v1) format
///
/// \param gfa_str An output stringstream to write output to.
void edges_to_gfa(std::ostringstream& gfa_str) const
{
for (auto& iter : edges_)
{
const edge_t& edge = iter.first;
const node_id_t edge_source = edge.first;
const node_id_t edge_sink = edge.second;
gfa_str << "L"
<< "\t" << edge_source << "\t"
<< "+"
<< "\t" << edge_sink << "\t"
<< "+"
<< "\t"
<< "*" << std::endl;
}
}

/// List of adjacent nodes per node ID
std::unordered_map<node_id_t, std::vector<node_id_t>> adjacent_nodes_;

Expand Down Expand Up @@ -214,6 +248,17 @@ class DirectedGraph : public Graph
}
}

std::string serialize_to_gfa() const
{
std::ostringstream gfa_str;
gfa_str << "H"
<< "\t"
<< "VN:Z:1.0" << std::endl;
node_labels_to_gfa(gfa_str);
edges_to_gfa(gfa_str);
return gfa_str.str();
}

/// \brief Serialize graph structure to dot format
///
/// \return A string encoding the graph in dot format
Expand Down
10 changes: 9 additions & 1 deletion cudapoa/src/application_parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ ApplicationParameters::ApplicationParameters(int argc, char* argv[])
{"band-mode", required_argument, 0, 'b'},
{"band-width", required_argument, 0, 'w'},
{"dot", required_argument, 0, 'd'},
{"gfa", required_argument, 0, 'G'},
{"max-groups", required_argument, 0, 'M'},
{"gpu-mem-alloc", required_argument, 0, 'R'},
{"match", required_argument, 0, 'm'},
Expand All @@ -49,7 +50,7 @@ ApplicationParameters::ApplicationParameters(int argc, char* argv[])
{"help", no_argument, 0, 'h'},
};

std::string optstring = "i:ab:w:d:M:R:m:n:g:vh";
std::string optstring = "i:ab:w:d:G:M:R:m:n:g:vh";

int32_t argument = 0;
while ((argument = getopt_long(argc, argv, optstring.c_str(), options, nullptr)) != -1)
Expand All @@ -75,6 +76,10 @@ ApplicationParameters::ApplicationParameters(int argc, char* argv[])
case 'd':
graph_output_path = std::string(optarg);
break;
case 'G':
graph_output_path = std::string(optarg);
output_gfa = true;
break;
case 'M':
max_groups = std::stoi(optarg);
break;
Expand Down Expand Up @@ -188,6 +193,9 @@ void ApplicationParameters::help(int32_t exit_code)
-d, --dot <file>
output path for printing graph in DOT format [disabled])"
<< R"(
-G, --gfa <file>
output path for printing graph in GFA format [disabled])"
<< R"(
-M, --max-groups <int>
maximum number of POA groups to create from file (-1 for all, > 0 for limited) [-1]
repeats groups if less groups are present than specified)"
Expand Down
1 change: 1 addition & 0 deletions cudapoa/src/application_parameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class ApplicationParameters

std::vector<std::string> input_paths;
std::string graph_output_path;
bool output_gfa = false;
bool all_fasta = true;
bool msa = false; // consensus by default
BandMode band_mode = BandMode::adaptive_band;
Expand Down
9 changes: 8 additions & 1 deletion cudapoa/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,14 @@ int main(int argc, char* argv[])
batch->get_graphs(graph, graph_status);
for (auto& g : graph)
{
graph_output << g.serialize_to_dot() << std::endl;
if (parameters.output_gfa)
{
graph_output << g.serialize_to_gfa() << std::endl;
}
else
{
graph_output << g.serialize_to_dot() << std::endl;
}
}
}

Expand Down

0 comments on commit f8126ef

Please sign in to comment.