Skip to content

merge api

Teo Lemane edited this page Jul 29, 2021 · 1 revision

Merge API allows to build partition-specific matrix from counted k-mers/hashes.

First, you need to count k-mers with kmtricks pipeline.

kmtricks pipeline --file kmtricks.fof --run-dir kmtricks_dir --nb-partitions 4 --kmer-size 20 --count-abundance-min 1 --mode kmer:count:bin --until count

Then, you can merge equivalent partitions (here p0) from multiple samples (here D1, D2, D3, D4).

#include <kmtricks/public.hpp>
#define MAX_KMER 32
#define MAX_COUNT 255
using count_type = typename selectC<MAX_COUNT>::type;

int main(int argc, char* argv[])
{
  std::vector<std::string> paths {
    "kmtricks_dir/counts/partition_0/D1.kmer",
    "kmtricks_dir/counts/partition_0/D2.kmer",
    "kmtricks_dir/counts/partition_0/D3.kmer",
    "kmtricks_dir/counts/partition_0/D4.kmer"
  };

  std::vector<uint32_t> thresholds = {2, 2, 2, 2};
  uint32_t kmer_size = 20;
  uint32_t recurrence_min = 1;
  uint32_t save_if = 1;
  // According to these parameters, a k-mer seen only once in a sample will be kept in this sample if it is solid in 1 (save_if) other sample.

  KmerMerger<MAX_KMER, MAX_COUNT> merger();
  while(merger.next())
  {
    if (merger.keep())
    {
      const Kmer<MAX_KMER>& kmer = merger.get_current();
      const std::vector<count_type>& counts = merger.counts(paths, thresholds, kmer_size, recurrence_min, save_if);
      std::cout << kmer.to_string() << " ";
      for (auto& c : counts)
        std::cout << std::to_string(c) << " ";
      std::cout << "\n";
    }
  }

  MergeStatistics<MAX_COUNT> stats* = merger.get_infos();
  const std::vector<uint64_t>& ns = stats->get_non_solid(); // ns[0] -> The number of non-solid k-mers in D1
  const std::vector<uint64_t>& r = stats->get_rescued(); // r[0] -> The number of rescued k-mers in D1
  const std::vector<uint64_t>& uwor = stats->get_unique_wo_rescue(); // uwor[0] -> The number of unique k-mers in D1 without rescue
  const std::vector<uint64_t>& uwr = stats->get_unique_w_rescue(); // uwr[0] -> The number of unique k-mers in D1 with rescue
  const std::vector<uint64_t>& twor = stats->get_total_wo_rescue(); // twor[0] -> The number of k-mers (sum of counts) in D1 without rescue
  const std::vector<uint64_t>& twr= stats->get_total_w_rescue(); // twr[0] -> -> The number of k-mers (sum of counts) in D1 with rescue
  return 0;
}