Skip to content

Commit

Permalink
pass guide tree and command string to msa2lddt visualisation
Browse files Browse the repository at this point in the history
  • Loading branch information
gamcil committed Jan 10, 2024
1 parent 015f426 commit 32eb03b
Show file tree
Hide file tree
Showing 8 changed files with 2,832 additions and 1,655 deletions.
6 changes: 5 additions & 1 deletion data/easymsa.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,11 @@ else
fi
fi

"$MMSEQS" msa2lddt "${STRUCTUREDB}" "${RESULTS}" "--lddt-html" "${RESULTS%%.*}.html" \
# shellcheck disable=SC2086
"$MMSEQS" msa2lddt "${STRUCTUREDB}" "${RESULTS}" \
"--lddt-html" "${RESULTS}.html" \
"--guide-tree" "${RESULTS}.nw" \
${MSA2LDDT_PAR} \
|| fail "msa2lddt died"

if [ -n "${REMOVE_TMP}" ]; then
Expand Down
4,378 changes: 2,744 additions & 1,634 deletions data/main_foldmason.js

Large diffs are not rendered by default.

Binary file modified data/vendor_foldmason.js.zst
Binary file not shown.
13 changes: 9 additions & 4 deletions src/commons/FoldmasonParameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ FoldmasonParameters::FoldmasonParameters() :
PARAM_BITFACTOR_3DI(PARAM_BITFACTOR_3DI_ID, "--bitfactor-3di", "3Di matrix bit factor", "3Di matrix bit factor", typeid(float), (void *) &bitFactor3Di, "^([0-9]*\\.[0-9]*)$"),
PARAM_OUTPUT_MODE(PARAM_OUTPUT_MODE_ID, "--output-mode", "Alignment output mode", "Output file mode: \n0: Amino acid\n1: 3Di alphabet", typeid(int), (void *) &outputmode, "[0-1]{1}$"),
PARAM_LDDT_HTML(PARAM_LDDT_HTML_ID, "--lddt-html", "LDDT HTML file", "File to write LDDT MSA HTML visualisation to", typeid(std::string), (void *) &lddtHtml, ""),
PARAM_PAIR_THRESHOLD(PARAM_PAIR_THRESHOLD_ID,"--pair-threshold", "LDDT pair threshold", "% of pair subalignments with LDDT information [0.0,1.0]",typeid(float), (void *) &pairThreshold, "^0(\\.[0-9]+)?|1(\\.0+)?$")
PARAM_PAIR_THRESHOLD(PARAM_PAIR_THRESHOLD_ID, "--pair-threshold", "LDDT pair threshold", "% of pair subalignments with LDDT information [0.0,1.0]",typeid(float), (void *) &pairThreshold, "^0(\\.[0-9]+)?|1(\\.0+)?$"),
PARAM_REPORT_COMMAND(PARAM_REPORT_COMMAND_ID, "--report-command", "", "", typeid(std::string), (void *) &reportCommand, "")
{
// structuremsa
structuremsa.push_back(&PARAM_WG);
Expand Down Expand Up @@ -53,16 +54,19 @@ FoldmasonParameters::FoldmasonParameters() :
structuremsacluster = combineList(structuremsacluster, structuremsa);
// msa2lddt
msa2lddt.push_back(&PARAM_HELP);
msa2lddt.push_back(&PARAM_LDDT_HTML);
msa2lddt.push_back(&PARAM_THREADS);
msa2lddt.push_back(&PARAM_PAIR_THRESHOLD);
msa2lddt.push_back(&PARAM_THREADS);
msa2lddt.push_back(&PARAM_GUIDE_TREE);
msa2lddt.push_back(&PARAM_V);
msa2lddt.push_back(&PARAM_REPORT_COMMAND);
// refinemsa
refinemsa = combineList(refinemsa, structuremsa);
easymsaworkflow = combineList(easymsaworkflow, structurecreatedb);
easymsaworkflow = combineList(easymsaworkflow, structuremsa);
easymsaworkflow = combineList(easymsaworkflow, msa2lddt);
easymsaworkflow.push_back(&PARAM_PRECLUSTER);
pcaAa = 1.1;
Expand All @@ -73,6 +77,7 @@ FoldmasonParameters::FoldmasonParameters() :
scoreBias3di = 0.6;
matchRatio = 0.51;
guideTree = "";
reportCommand = "";
recomputeScores = false;
regressive = false;
precluster = false;
Expand Down
3 changes: 2 additions & 1 deletion src/commons/FoldmasonParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class FoldmasonParameters : public LocalParameters {
PARAMETER(PARAM_OUTPUT_MODE)
PARAMETER(PARAM_LDDT_HTML)
PARAMETER(PARAM_PAIR_THRESHOLD)
// PARAMETER(PARAM_NEWICK_OUTPUT)
PARAMETER(PARAM_REPORT_COMMAND)

MultiParam<PseudoCounts> pcaAa;
MultiParam<PseudoCounts> pcbAa;
Expand All @@ -67,6 +67,7 @@ class FoldmasonParameters : public LocalParameters {
float bitFactor3Di;
int outputmode;
std::string lddtHtml;
std::string reportCommand;
float pairThreshold;
};
#endif
30 changes: 28 additions & 2 deletions src/strucclustutils/msa2lddt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "Parameters.h"
#include "StructureUtil.h"
#include "Util.h"
#include <fstream>
#include <cassert>

#define ZSTD_STATIC_LINKING_ONLY
Expand Down Expand Up @@ -470,6 +471,7 @@ R"html(<!DOCTYPE html>

free(dst);

// tree: string (optional, from --guide-tree)
// entries: [ { name, aa, ss, ca }, ... ]
// scores: [ float ]
// statistics: { db, msaFile, msaLDDT }
Expand Down Expand Up @@ -504,13 +506,33 @@ R"html(<!DOCTYPE html>
entry.append(",");
resultWriter.writeData(entry.c_str(), entry.length(), 0, 0, false, false);
}
std::string end = "],\"statistics\": {";
end.append("\"db\":\"");
std::string end = "],";

if (par.guideTree != "") {
std::string tree;
std::string line;
std::ifstream newick(par.guideTree);
if (newick.is_open()) {
while (std::getline(newick, line))
tree += line;
newick.close();
}
end.append("\"tree\": \"");
end.append(tree);
end.append("\",");
}
end.append("\"statistics\": {\"db\":\"");
end.append(par.db1);
end.append("\",\"msaFile\":\"");
end.append(par.db2);
end.append("\",\"msaLDDT\":");
end.append(std::to_string(lddtScore));

if (par.reportCommand != "") {
end.append(",\"cmdString\":\"");
end.append(par.reportCommand);
end.append("\"");
}
end.append("}}</script>");

resultWriter.writeData(end.c_str(), end.length(), 0, 0, false, false);
Expand All @@ -519,6 +541,10 @@ R"html(<!DOCTYPE html>
FileUtil::remove(lddtHtmlIdx.c_str());
}

if (par.reportCommand != "") {
std::cout << "Report command: " << par.reportCommand << '\n';
}

seqDbrAA.close();
seqDbrCA.close();
seqDbr3Di.close();
Expand Down
38 changes: 27 additions & 11 deletions src/strucclustutils/structuremsa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1451,21 +1451,27 @@ int structuremsa(int argc, const char **argv, const Command& command, bool preCl
} else {
memset(alreadyMerged, 0, sizeof(bool) * sequenceCnt);
}


// Check if guide tree argument given
// Try parse --> read if non-empty, otherwise generate one and write
std::string tree;
std::vector<AlnSimple> hits;

if (par.guideTree != "") {
std::cout << "Loading guide tree: " << par.guideTree << "\n";
std::string tree;
std::string line;
std::ifstream newick(par.guideTree);
if (newick.is_open()) {
Debug(Debug::INFO) << "Writing guide tree to: " << par.guideTree << '\n';
while (std::getline(newick, line))
tree += line;
newick.close();
}
}
if (tree != "") {
hits = parseNewick(tree, headers_rev);
if (par.regressive)
if (par.regressive) {
std::reverse(hits.begin(), hits.end());
}
} else {
hits = updateAllScores(
tinySubMatAA,
Expand Down Expand Up @@ -1505,7 +1511,6 @@ int structuremsa(int argc, const char **argv, const Command& command, bool preCl
hits = mst(hits, sequenceCnt);
std::cout << "Generated guide tree\n";
}


std::cout << "Optimising merge order\n";
std::vector<size_t> merges;
Expand All @@ -1521,9 +1526,13 @@ int structuremsa(int argc, const char **argv, const Command& command, bool preCl
}

std::string nw = orderToTree(hits, headers, sequenceCnt);
std::cout << "Tree: " << nw << ";\n";
std::string treeFile = par.filenames[par.filenames.size()-1] + ".nw";
Debug(Debug::INFO) << "Writing guide tree to: " << treeFile << '\n';
std::ofstream guideTree(treeFile, std::ofstream::out);
guideTree << nw;
guideTree.close();

std::cout << "Merging:\n";
Debug(Debug::INFO) << "Merging:\n";

size_t finalMSAId = 0;

Expand Down Expand Up @@ -1693,9 +1702,9 @@ int structuremsa(int argc, const char **argv, const Command& command, bool preCl
groups[targetId].clear();
mappings[targetId].clear();

testSeqLens(groups[mergedId], cigars_aa, seqLens);
// testSeqLens(groups[mergedId], cigars_aa, seqLens);

if (true) {
/* if (false) {
// calculate LDDT of merged alignment
float lddtScore = std::get<2>(calculate_lddt(cigars_aa, groups[mergedId], dbKeys, seqLens, &seqDbrCA, par.pairThreshold));
std::cout << std::fixed << std::setprecision(4)
Expand All @@ -1705,7 +1714,14 @@ if (true) {
std::cout << "\t(TM-align)";
}
std::cout << '\n';
}
} */
std::cout << std::fixed << std::setprecision(4)
<< queryIsProfile << "\t" << targetIsProfile << '\t' << headers[mergedId] << "\t" << headers[targetId]
<< '\t' << res.score;
if (tmaligned){
std::cout << "\t(TM-align)";
}
std::cout << '\n';

mappings[mergedId] = computeProfileMask(
groups[mergedId],
Expand Down Expand Up @@ -1802,7 +1818,7 @@ if (true) {

// Write final MSA to file with correct headers
DBWriter resultWriter(
par.filenames[par.filenames.size()-1].c_str(),
(par.filenames[par.filenames.size()-1] + ".fa").c_str(),
(par.filenames[par.filenames.size()-1] + ".index").c_str(),
static_cast<unsigned int>(par.threads), par.compressed, Parameters::DBTYPE_OMIT_FILE
);
Expand Down
19 changes: 17 additions & 2 deletions src/workflow/EasyMSA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,26 @@ int easymsa(int argc, const char **argv, const Command &command) {
cmd.addVariable("REMOVE_TMP", par.removeTmpFiles ? "TRUE" : NULL);

cmd.addVariable("RUNNER", par.runner.c_str());
cmd.addVariable("THREADS_PAR", par.createParameterString(par.onlythreads).c_str());
cmd.addVariable("VERBOSITY", par.createParameterString(par.onlyverbosity).c_str());
cmd.addVariable("PRECLUSTER", par.precluster ? "TRUE" : NULL);

cmd.addVariable("PRECLUSTER", par.precluster ? "TRUE" : NULL);
cmd.addVariable("CREATEDB_PAR", par.createParameterString(par.structurecreatedb).c_str());

// needs to be last
std::vector<MMseqsParameter *> msa2lddtWithoutHtml;
par.PARAM_GAP_OPEN.wasSet = true;
par.PARAM_GAP_EXTEND.wasSet = true;
par.PARAM_MATCH_RATIO.wasSet = true;
par.PARAM_FILTER_MSA.wasSet = true;
par.reportCommand = par.createParameterString(par.easymsaworkflow, true);
for (size_t i = 0; i < par.msa2lddt.size(); i++) {
if (par.msa2lddt[i]->uniqid != par.PARAM_LDDT_HTML.uniqid &&
par.msa2lddt[i]->uniqid != par.PARAM_GUIDE_TREE.uniqid) {
msa2lddtWithoutHtml.push_back(par.msa2lddt[i]);
}
}
cmd.addVariable("MSA2LDDT_PAR", par.createParameterString(msa2lddtWithoutHtml).c_str());

std::string program = tmpDir + "/easymsa.sh";
FileUtil::writeFile(program, easymsa_sh, easymsa_sh_len);
cmd.execProgram(program.c_str(), par.filenames);
Expand Down

0 comments on commit 32eb03b

Please sign in to comment.