-
Notifications
You must be signed in to change notification settings - Fork 8
IOs API
Teo Lemane edited this page Jul 29, 2021
·
1 revision
This page describes how to use kmtricks API to read kmtricks's files.
KmerReader<BUF_SIZE> reader("./km_dir/counts/partition_0/D1.kmer");
uint32_t kmer_size = reader.infos().kmer_size;
Kmer<MAX_K> kmer;
kmer.set_k(kmer_size);
count_type count;
while (reader.read<MAX_K, MAX_C>(kmer, count))
{
std::cout << kmer.to_string() << " " << std::to_string(count) << "\n";
}
reader.write_as_text(std::cout);
uint32_t kmer_size = 32;
std::vector<std::string> paths {"./km_dir/counts/partition_0/D1.kmer", "./km_dir/counts/partition_1/D1.kmer"};
KmerFileAggregator<MAX_K, MAX_C> agg(paths, kmer_size);
agg.write_as_text(std::cout);
bool lz4_compress = true;
agg.write_as_bin("./D1.agg.kmer", lz4_compress);
uint32_t kmer_size = 32;
std::vector<std::string> paths {"./km_dir/counts/partition_0/D1.kmer", "./km_dir/counts/partition_1/D1.kmer"};
KmerFileMerger<MAX_K, MAX_C> merger(paths, kmer_size);
while (merger.next())
{
const Kmer<MAX_K>& kmer = merge.current();
count_type count = merge.count();
std::cout << kmer.to_string() << " " << std::to_string(count) << "\n";
}
merger.write_as_text(std::cout);
bool lz4_compress = true;
merger.write_as_bin("./D1.agg.kmer", lz4_compress);
MatrixReader<BUF_SIZE> reader("./km_dir/counts/matrices/matrix_0.count");
uint32_t kmer_size = reader.infos().kmer_size;
Kmer<MAX_K> kmer;
kmer.set_k(kmer_size);
std::vector<count_type> counts(reader.infos().nb_counts); // count order follows the sample order in the input fof
while (reader.read<MAX_K, MAX_C>(kmer, counts))
{
std::cout << kmer.to_string() << " ";
for (auto& c : counts)
std::cout << std::to_string(c) << " ";
std::cout << "\n";
}
reader.write_as_text(std::cout);
std::vector<std::string> paths {"./km_dir/counts/matrices/matrix_0.count", "./km_dir/counts/matrices/matrix_1.count"};
MatrixFileAggregator<MAX_K, MAX_C> mfa(paths, kmer_size);
mfa.write_as_text(std::cout);
bool lz4_compress = true;
mfa.write_as_bin("./matrix.count", lz4_compress);
std::vector<std::string> paths {"./km_dir/counts/matrices/matrix_0.count", "./km_dir/counts/matrices/matrix_1.count"};
MatrixFileMerger<MAX_K, MAX_C> mfm(paths, kmer_size);
while (mfm.next())
{
const Kmer<MAX_K>& kmer = mfm.current();
const std::vector<count_type>& count = merge.counts();
std::cout << kmer.to_string() << " ";
for (auto& c : counts)
std::cout << std::to_string(c) << " ";
std::cout << "\n";
}
mfm.write_as_text(std::cout);
bool lz4_compress = true;
mfm.write_as_bin("./matrix.sorted.count", lz4_compress);
PAMatrixReader<BUF_SIZE> reader("./km_dir/counts/matrices/matrix_0.pa");
uint32_t kmer_size = reader.infos().kmer_size;
Kmer<MAX_K> kmer;
kmer.set_k(kmer_size);
std::vector<uint8_t> bits(NBYTES(reader.infos().bits));
int sample_id = 10; // sample at line 10 in the input fof
while (reader.read<MAX_K>(kmer, bits))
{
std::cout << kmer.to_string() << " ";
if (BITCHECK(bits, sample_id))
std::cout << "Found in sample ";
else
std::cout << "Not found in sample ";
std::cout << sample_id << "\n";
}
reader.write_as_text(std::cout);1
std::vector<std::string> paths {"./km_dir/counts/matrices/matrix_0.pa", "./km_dir/counts/matrices/matrix_1.pa"};
PAMatrixFileAggregator<MAX_K, MAX_C> pmfa(paths, );
pmfa.write_as_text(std::cout);
bool lz4_compress = true;
pmfa.write_as_bin("./matrix.pa", lz4_compress);
C. Merge presence/absence matrix files (allows to obtains a single sorted presence/absence matrix file)
std::vector<std::string> paths {"./km_dir/counts/matrices/matrix_0.pa", "./km_dir/counts/matrices/matrix_1.pa"};
PAMatrixFileMerger<MAX_K> pmfm(paths, kmer_size);
int sample_id = 10; // sample at line 10 in the input fof
while (pmfm.next())
{
const Kmer<MAX_K>& kmer = pmfm.current();
const std::vector<uint8_t>& bits = merge.bits();
std::cout << kmer.to_string() << " ";
if (BITCHECK(bits, sample_id))
std::cout << "Found in sample ";
else
std::cout << "Not found in sample ";
std::cout << sample_id << "\n";
}
pmfm.write_as_text(std::cout);
bool lz4_compress = true;
pmfm.write_as_bin("./matrix.sorted.pa", lz4_compress);
The API for hash files is the same but use uint64_t
instead of Kmer<MAX_K>
. Mergers are not provided for hashes because hash spaces are specific and consecutive according to partitions. To obtains truly sorted outputs, partitions just need to be aggregated in the right order.
HashReader<BUF_SIZE> reader(const std::string& path);
HashFileAggregator<MAX_C> hfa(const std::vector<std::string>& paths);
HashMatrixReader<BUF_SIZE> reader(const std::string& path);
HashMatrixFileAggregator<MAX_C> hmfa(const std::vector<std::string>& paths);
PAHashMatrixReader<BUF_SIZE> reader(const std::string& path);
PAHashMatrixFileAggregator<MAX_C> phmfa(const std::vector<std::string>& paths);