diff --git a/include/info.cpp b/include/info.cpp index fa3069e..3a683f1 100644 --- a/include/info.cpp +++ b/include/info.cpp @@ -39,9 +39,12 @@ double bits_per_kmer_formula(uint64_t k, /* kmer length */ assert(k > 0); assert(k >= m); - const uint64_t N = n + M * (k - 1); // num. symbols in SPSS + const uint64_t N = n + M * (k - 1); // num. characters in SPSS + + /* summing (M-1) provides an upper bound */ + double num_minimizers = + (2.0 * n) / (k - m + 2) + (M - 1); // not distinct, hence num. of super-kmers - // double num_minimizers = (2.0 * n) / (k - m + 2); // not distinct, hence num. of super-kmers // std::cout << "num_minimizers = " << num_minimizers << std::endl; // std::cout << "minimizers: " << (3.0 * num_minimizers) / n << " [bits/kmer]" << std::endl; // std::cout << "pieces: " << (M * (2.0 + std::ceil(std::log2(static_cast(N) / M)))) / n @@ -53,8 +56,11 @@ double bits_per_kmer_formula(uint64_t k, /* kmer length */ // << std::endl; // std::cout << "strings: " << (2.0 * N) / n << " [bits/kmer]" << std::endl; - double num_bits = 2 * n * (1.0 + (5.0 + std::ceil(std::log2(N))) / (k - m + 2)) + - M * (2 * k + std::ceil(std::log2(static_cast(n) / M + k - 1))); + // double num_bits = 2 * n * (1.0 + (5.0 + std::ceil(std::log2(N))) / (k - m + 2)) + + // M * (2 * k + std::ceil(std::log2(static_cast(n) / M + k - 1))); + + double num_bits = 2 * N + num_minimizers * (5.0 + std::ceil(std::log2(N))) + + M * (2.0 + std::ceil(std::log2(static_cast(N) / M))); return num_bits / n; }