Skip to content

Commit

Permalink
Merge pull request #250 from paulsengroup/fix/cooler-var-bin
Browse files Browse the repository at this point in the history
Fix division by zero when fetching pixels from coolers with bin tables with variable bin size
  • Loading branch information
robomics committed Sep 13, 2024
2 parents 784477d + 77613c3 commit f90cadb
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 8 deletions.
16 changes: 13 additions & 3 deletions src/libhictk/cooler/include/hictk/cooler/impl/index_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
//
// SPDX-License-Identifier: MIT

#pragma once

#include <fmt/format.h>

#include <algorithm>
Expand All @@ -13,10 +15,12 @@
#include <stdexcept>
#include <string_view>
#include <utility>
#include <variant>
#include <vector>

#include "hictk/bin_table.hpp"
#include "hictk/chromosome.hpp"
#include "hictk/common.hpp"
#include "hictk/fmt/chromosome.hpp"

namespace hictk::cooler {
Expand Down Expand Up @@ -98,12 +102,18 @@ inline std::uint64_t Index::get_offset_by_bin_id(std::uint64_t bin_id) const {
}

inline std::uint64_t Index::get_offset_by_pos(const Chromosome &chrom, std::uint32_t pos) const {
const auto row_idx = pos / resolution();
return get_offset_by_row_idx(chrom.id(), row_idx);
return get_offset_by_pos(chrom.id(), pos);
}

inline std::uint64_t Index::get_offset_by_pos(std::uint32_t chrom_id, std::uint32_t pos) const {
const auto row_idx = pos / resolution();
if (HICTK_LIKELY(std::holds_alternative<BinTableFixed>(_bins->get()))) {
assert(resolution() != 0);
const auto row_idx = pos / resolution();
return get_offset_by_row_idx(chrom_id, row_idx);
}

assert(resolution() == 0);
const auto row_idx = bins().at(chrom_id, pos).rel_id();
return get_offset_by_row_idx(chrom_id, row_idx);
}

Expand Down
112 changes: 107 additions & 5 deletions test/units/cooler/pixel_selector_1d_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,36 +23,40 @@
namespace hictk::cooler::test::pixel_selector {

template <typename N>
static std::ptrdiff_t generate_test_data(const std::filesystem::path& path, const Reference& chroms,
std::uint32_t bin_size) {
static std::pair<std::ptrdiff_t, N> generate_test_data(const std::filesystem::path& path,
const Reference& chroms,
std::uint32_t bin_size) {
auto f = File::create<N>(path.string(), chroms, bin_size, true);

const auto num_bins = f.bins().size();

std::vector<ThinPixel<N>> pixels;

N n = 1;
N sum = 0;
for (std::uint64_t bin1_id = 0; bin1_id < num_bins; ++bin1_id) {
for (std::uint64_t bin2_id = bin1_id; bin2_id < num_bins; ++bin2_id) {
sum += n;
pixels.emplace_back(ThinPixel<N>{bin1_id, bin2_id, n++});
}
}
f.append_pixels(pixels.begin(), pixels.end());
return static_cast<std::ptrdiff_t>(pixels.size());
return std::make_pair(static_cast<std::ptrdiff_t>(pixels.size()), sum);
}

// NOLINTNEXTLINE(readability-function-cognitive-complexity)
TEST_CASE("Cooler: pixel selector 1D queries", "[pixel_selector][short]") {
TEST_CASE("Cooler (fixed bin size): pixel selector 1D queries", "[pixel_selector][short]") {
const auto path1 = testdir() / "pixel_selector_devel.cool";

const Reference chroms{Chromosome{0, "chr1", 1000}, Chromosome{1, "chr2", 100}};
constexpr std::uint32_t bin_size = 10;
using T = std::uint32_t;

const auto expected_nnz = generate_test_data<T>(path1, chroms, bin_size);
const auto [expected_nnz, expected_sum] = generate_test_data<T>(path1, chroms, bin_size);

const File f(path1.string());
REQUIRE(std::distance(f.begin<T>(), f.end<T>()) == expected_nnz);
REQUIRE(std::holds_alternative<BinTableFixed>(f.bins().get()));

SECTION("query overlaps chrom start") {
auto selector = f.fetch("chr1:0-20");
Expand Down Expand Up @@ -165,6 +169,15 @@ TEST_CASE("Cooler: pixel selector 1D queries", "[pixel_selector][short]") {
CHECK(sum == 334'290);
}

SECTION("query spans entire genome") {
auto selector = f.fetch();
CHECK(std::distance(selector.begin<T>(), selector.end<T>()) == expected_nnz);
const auto sum = std::accumulate(
selector.begin<T>(), selector.end<T>(), T(0),
[&](T accumulator, const ThinPixel<T>& pixel) { return accumulator + pixel.count; });
CHECK(sum == expected_sum);
}

SECTION("equality operator") {
CHECK(f.fetch("chr1:0-1000") == f.fetch("chr1:0-1000"));
CHECK(f.fetch("chr1:10-1000") != f.fetch("chr1:0-1000"));
Expand Down Expand Up @@ -216,4 +229,93 @@ TEST_CASE("Cooler: pixel selector 1D queries", "[pixel_selector][short]") {
}
}

// NOLINTNEXTLINE(readability-function-cognitive-complexity)
TEST_CASE("Cooler (variable bin size): pixel selector 1D queries", "[pixel_selector][short]") {
const auto path1 = datadir / "cooler_variable_bins_test_file.cool";
using T = std::uint32_t;

const File f(path1.string());
REQUIRE(std::holds_alternative<BinTableVariable<>>(f.bins().get()));

SECTION("query overlaps chrom start") {
auto selector = f.fetch("chr1:0-20");
const auto pixels = selector.read_all<T>();
REQUIRE(pixels.size() == 1);
const auto& p = pixels.front();

CHECK(p.coords.bin1.id() == 0);
CHECK(p.coords.bin2.id() == 2);
CHECK(p.count == 7);
}

SECTION("query overlaps chrom end") {
auto selector = f.fetch("chr1:20-32");
const auto pixels = selector.read_all<T>();
REQUIRE(pixels.size() == 1);
const auto& p = pixels.front();

CHECK(p.coords.bin1.id() == 2);
CHECK(p.coords.bin2.id() == 3);
CHECK(p.count == 1);
}

SECTION("query does not overlap chrom boundaries") {
auto selector = f.fetch("chr1:15-23");
const auto pixels = selector.read_all<T>();
REQUIRE(pixels.empty());
}

SECTION("query does not line up with bins") {
auto selector = f.fetch("chr1:17-27");
const auto pixels = selector.read_all<T>();
REQUIRE(pixels.size() == 1);

const auto& p = pixels.front();

CHECK(p.coords.bin1.id() == 2);
CHECK(p.coords.bin2.id() == 3);
CHECK(p.count == 1);
}

SECTION("query spans 1 bin") {
auto selector = f.fetch("chr1:0-8");
CHECK(selector.empty());
}

SECTION("query spans 1bp") {
auto selector = f.fetch("chr1:0-1");
CHECK(selector.empty());
}

SECTION("query spans entire chromosome") {
auto selector = f.fetch("chr1");
auto pixels = selector.read_all<T>();

REQUIRE(pixels.size() == 4);
CHECK(pixels[0].count == 7);
CHECK(pixels[1].count == 1);
CHECK(pixels[2].count == 7);
CHECK(pixels[3].count == 1);

selector = f.fetch("chr2");
pixels = selector.read_all<T>();

REQUIRE(pixels.size() == 3);
CHECK(pixels[0].count == 5);
CHECK(pixels[1].count == 5);
CHECK(pixels[2].count == 6);
}

SECTION("query spans entire genome") {
constexpr std::ptrdiff_t expected_nnz = 19;
constexpr T expected_sum = 96;
auto selector = f.fetch();
CHECK(std::distance(selector.begin<T>(), selector.end<T>()) == expected_nnz);
const auto sum = std::accumulate(
selector.begin<T>(), selector.end<T>(), T(0),
[&](T accumulator, const ThinPixel<T>& pixel) { return accumulator + pixel.count; });
CHECK(sum == expected_sum);
}
}

} // namespace hictk::cooler::test::pixel_selector

0 comments on commit f90cadb

Please sign in to comment.