Skip to content

Commit

Permalink
CacheReader: Implement cache file reading one Cube at time
Browse files Browse the repository at this point in the history
- Remove CacheReader XYZ mapping.
- Add CubeReadIterator that reads Cubes one at time.
- FileShapeRange takes the cache file and offsets into the file
- Update CacheReader::loadFile() to initialize array of
  FileShapeRange from the cache file.

Result is celebration hooray for computing N=14 first time
with less than 9 GiB of RSS:

```process output shape  99/101 [ 3  5  5]
  shape 2 5 5
  shape 3 4 5
  num: 588828
saved ./cache/cubes_14_3-5-5.bin, took 0.01 s
process output shape 100/101 [ 4  4  4]
  shape 3 4 4
  shape 4 4 4
  num: 3341560
saved ./cache/cubes_14_4-4-4.bin, took 0.11 s
process output shape 101/101 [ 4  4  5]
  shape 3 4 5
  shape 4 4 4
  num: 752858
saved ./cache/cubes_14_4-4-5.bin, took 0.02 s
took 7231.83 s
num total cubes: 1039496297```

My nvme disk was not particularly happy with with
`output shape  80/101 [ 2  3  4]` that produced an +8 GiB file at end.
The disk throttled badly after reaching 60*C...
But it did complete eventually at reasonable pace and
memory usage dropped below 7 GiB for rest of the run.

N=15 will require more tuning to the CubeStorage read-cache and
more parallel file system.
btrfs looks to be not very good at this job
as writing the storage files in parallel reduces the program to
near single threaded speed.

Signed-off-by: JATothrim <jarmo.tiitto@gmail.com>
  • Loading branch information
JATothrim committed Aug 26, 2023
1 parent 713b063 commit 37d51e5
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 16 deletions.
78 changes: 76 additions & 2 deletions cpp/include/newCache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <mutex>
#include <string>
#include <thread>
#include <memory>

#include "cube.hpp"
#include "hashes.hpp"
Expand Down Expand Up @@ -125,6 +126,61 @@ class CubeIterator : public ICubeIterator {
const XYZ* m_ptr;
};

class CubeReadIterator : public ICubeIterator {
public:
using iterator_category = std::forward_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = Cube;
using pointer = Cube*; // or also value_type*
using reference = Cube&; // or also value_type&

// constructor
CubeReadIterator(std::shared_ptr<mapped::file> file, uint32_t _n, mapped::seekoff_t offset) : n(_n), m_seek(offset), m_file(file) {}

// invalid iterator (can't deference)
explicit CubeReadIterator() : n(0), m_seek(-1) {}

std::unique_ptr<ICubeIterator> clone() const override { return std::make_unique<CubeReadIterator>(*this); }

// derefecence
const value_type operator*() const override { return read(); }

// pointer operator->() { return (pointer)m_seek; }

uint64_t seek() const override { return (uint64_t)m_seek; }

// Prefix increment
ICubeIterator& operator++() override {
m_seek += n * sizeof(XYZ);
return *this;
}

ICubeIterator& operator+=(int incr) override {
m_seek += n * incr * sizeof(XYZ);
return *this;
}

// Postfix increment
CubeReadIterator operator++(int) {
CubeReadIterator tmp = *this;
++(*this);
return tmp;
}

friend bool operator==(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek == b.m_seek; };
friend bool operator<(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek < b.m_seek; };
friend bool operator>(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek > b.m_seek; };
friend bool operator!=(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek != b.m_seek; };

private:
uint32_t n;
mapped::seekoff_t m_seek;
std::shared_ptr<mapped::file> m_file;

// de-reference is implemented by read()
Cube read() const;
};

/**
* To avoid complicating the use of the ICubeIterator
* CacheIterator provides type-erased wrapper that can be copied.
Expand Down Expand Up @@ -211,6 +267,25 @@ class ShapeRange : public IShapeRange {
XYZ shape_;
};

class FileShapeRange : public IShapeRange {
public:
FileShapeRange(std::shared_ptr<mapped::file> file, mapped::seekoff_t start, mapped::seekoff_t stop, uint64_t _cubeLen, XYZ _shape)
: b(CubeReadIterator(file, _cubeLen, start)),
e(CubeReadIterator(file, _cubeLen, stop)),
size_((stop - start) / _cubeLen), shape_(_shape) {}

CacheIterator begin() const override { return b; }
CacheIterator end() const override { return e; }

XYZ& shape() override { return shape_; }
size_t size() const override { return size_; }

private:
CacheIterator b, e;
uint64_t size_;
XYZ shape_;
};

class ICache {
public:
virtual ~ICache(){};
Expand Down Expand Up @@ -243,9 +318,8 @@ class CacheReader : public ICache {
std::shared_ptr<mapped::file> file_;
std::unique_ptr<const mapped::struct_region<cacheformat::Header>> header_;
std::unique_ptr<const mapped::array_region<cacheformat::ShapeEntry>> shapes_;
std::unique_ptr<const mapped::array_region<XYZ>> xyz_;

std::vector<ShapeRange> shapeRanges;
std::vector<FileShapeRange> shapeRanges;

std::string path_;
bool fileLoaded_;
Expand Down
26 changes: 12 additions & 14 deletions cpp/src/newCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,35 +61,34 @@ int CacheReader::loadFile(const std::string path) {
std::printf("warn: file size does not match expected value\n");
}

xyz_ = std::make_unique<const mapped::array_region<XYZ>>(file_, shapes_->getEndSeek(), datasize);

// Initialize shapeRanges array:
size_t offset = 0;
for (unsigned int i = 0; i < header->numShapes; ++i) {
if (shapes[i].size) {
auto index = offset / cacheformat::XYZ_SIZE;
auto num_xyz = shapes[i].size / cacheformat::XYZ_SIZE;
auto start = xyz_->get() + index;
auto end = xyz_->get() + index + num_xyz;

shapeRanges.emplace_back(start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2));
auto start = shapes[i].offset;
auto end = start + shapes[i].size;
shapeRanges.emplace_back(file_, start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2));
} else {
// table entry has no data.
// shapes[i].offset may have bogus value.
shapeRanges.emplace_back(nullptr, nullptr, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2));
shapeRanges.emplace_back(file_, -1, -1, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2));
}

offset += shapes[i].size;
}

// Add dummy entry at back:
shapeRanges.emplace_back(nullptr, nullptr, header->n, XYZ(0, 0, 0));
shapeRanges.emplace_back(file_, -1, -1, header->n, XYZ(0, 0, 0));

fileLoaded_ = true;

return 0;
}

Cube CubeReadIterator::read() const {
Cube tmp(n);
m_file->readAt(m_seek, n * sizeof(XYZ), tmp.data());
return tmp;
}


IShapeRange &CacheReader::getCubesByShape(uint32_t i) {
if (i >= header->numShapes) {
return shapeRanges.back();
Expand All @@ -102,7 +101,6 @@ void CacheReader::unload() {
// unload file from memory
if (fileLoaded_) {
shapeRanges.clear();
xyz_.reset();
shapes_.reset();
header_.reset();
file_.reset();
Expand Down

0 comments on commit 37d51e5

Please sign in to comment.