From 37d51e5ee567a8c9a6cf36c0ef441aa7f342205e Mon Sep 17 00:00:00 2001
From: JATothrim <jarmo.tiitto@gmail.com>
Date: Sat, 26 Aug 2023 03:36:26 +0300
Subject: [PATCH] CacheReader: Implement cache file reading one Cube at time

- Remove CacheReader XYZ mapping.
- Add CubeReadIterator that reads Cubes one at time.
- FileShapeRange takes the cache file and offsets into the file
- Update CacheReader::loadFile() to initialize array of
  FileShapeRange from the cache file.

Result is celebration hooray for computing N=14 first time
with less than 9 GiB of RSS:

```process output shape  99/101 [ 3  5  5]
  shape 2 5 5
  shape 3 4 5
  num: 588828
saved ./cache/cubes_14_3-5-5.bin, took 0.01 s
process output shape 100/101 [ 4  4  4]
  shape 3 4 4
  shape 4 4 4
  num: 3341560
saved ./cache/cubes_14_4-4-4.bin, took 0.11 s
process output shape 101/101 [ 4  4  5]
  shape 3 4 5
  shape 4 4 4
  num: 752858
saved ./cache/cubes_14_4-4-5.bin, took 0.02 s
took 7231.83 s
num total cubes: 1039496297```

My nvme disk was not particularly happy with with
`output shape  80/101 [ 2  3  4]` that produced an +8 GiB file at end.
The disk throttled badly after reaching 60*C...
But it did complete eventually at reasonable pace and
memory usage dropped below 7 GiB for rest of the run.

N=15 will require more tuning to the CubeStorage read-cache and
more parallel file system.
btrfs looks to be not very good at this job
as writing the storage files in parallel reduces the program to
near single threaded speed.

Signed-off-by: JATothrim <jarmo.tiitto@gmail.com>
---
 cpp/include/newCache.hpp | 78 ++++++++++++++++++++++++++++++++++++++--
 cpp/src/newCache.cpp     | 26 +++++++-------
 2 files changed, 88 insertions(+), 16 deletions(-)
diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp
index c24cde7..20fd660 100644
--- a/cpp/include/newCache.hpp
+++ b/cpp/include/newCache.hpp
@@ -8,6 +8,7 @@
 #include <mutex>
 #include <string>
 #include <thread>
+#include <memory>
 
 #include "cube.hpp"
 #include "hashes.hpp"
@@ -125,6 +126,61 @@ class CubeIterator : public ICubeIterator {
     const XYZ* m_ptr;
 };
 
+class CubeReadIterator : public ICubeIterator {
+   public:
+    using iterator_category = std::forward_iterator_tag;
+    using difference_type = std::ptrdiff_t;
+    using value_type = Cube;
+    using pointer = Cube*;    // or also value_type*
+    using reference = Cube&;  // or also value_type&
+
+    // constructor
+    CubeReadIterator(std::shared_ptr<mapped::file> file, uint32_t _n, mapped::seekoff_t offset) : n(_n), m_seek(offset), m_file(file) {}
+
+    // invalid iterator (can't deference)
+    explicit CubeReadIterator() : n(0), m_seek(-1) {}
+
+    std::unique_ptr<ICubeIterator> clone() const override { return std::make_unique<CubeReadIterator>(*this); }
+
+    // derefecence
+    const value_type operator*() const override { return read(); }
+
+    // pointer operator->() { return (pointer)m_seek; }
+
+    uint64_t seek() const override { return (uint64_t)m_seek; }
+
+    // Prefix increment
+    ICubeIterator& operator++() override {
+        m_seek += n * sizeof(XYZ);
+        return *this;
+    }
+
+    ICubeIterator& operator+=(int incr) override {
+        m_seek += n * incr * sizeof(XYZ);
+        return *this;
+    }
+
+    // Postfix increment
+    CubeReadIterator operator++(int) {
+        CubeReadIterator tmp = *this;
+        ++(*this);
+        return tmp;
+    }
+
+    friend bool operator==(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek == b.m_seek; };
+    friend bool operator<(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek < b.m_seek; };
+    friend bool operator>(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek > b.m_seek; };
+    friend bool operator!=(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek != b.m_seek; };
+
+   private:
+    uint32_t n;
+    mapped::seekoff_t m_seek;
+    std::shared_ptr<mapped::file> m_file;
+
+    // de-reference is implemented by read()
+    Cube read() const;
+};
+
 /**
  * To avoid complicating the use of the ICubeIterator
  * CacheIterator provides type-erased wrapper that can be copied.
@@ -211,6 +267,25 @@ class ShapeRange : public IShapeRange {
     XYZ shape_;
 };
 
+class FileShapeRange : public IShapeRange {
+   public:
+    FileShapeRange(std::shared_ptr<mapped::file> file, mapped::seekoff_t start, mapped::seekoff_t stop, uint64_t _cubeLen, XYZ _shape)
+        : b(CubeReadIterator(file, _cubeLen, start)),
+        e(CubeReadIterator(file, _cubeLen, stop)),
+        size_((stop - start) / _cubeLen), shape_(_shape) {}
+
+    CacheIterator begin() const override { return b; }
+    CacheIterator end() const override { return e; }
+
+    XYZ& shape() override { return shape_; }
+    size_t size() const override { return size_; }
+
+   private:
+    CacheIterator b, e;
+    uint64_t size_;
+    XYZ shape_;
+};
+
 class ICache {
    public:
     virtual ~ICache(){};
@@ -243,9 +318,8 @@ class CacheReader : public ICache {
     std::shared_ptr<mapped::file> file_;
     std::unique_ptr<const mapped::struct_region<cacheformat::Header>> header_;
     std::unique_ptr<const mapped::array_region<cacheformat::ShapeEntry>> shapes_;
-    std::unique_ptr<const mapped::array_region<XYZ>> xyz_;
 
-    std::vector<ShapeRange> shapeRanges;
+    std::vector<FileShapeRange> shapeRanges;
 
     std::string path_;
     bool fileLoaded_;
diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp
index e2d7800..9e0c54e 100644
--- a/cpp/src/newCache.cpp
+++ b/cpp/src/newCache.cpp
@@ -61,35 +61,34 @@ int CacheReader::loadFile(const std::string path) {
         std::printf("warn: file size does not match expected value\n");
     }
 
-    xyz_ = std::make_unique<const mapped::array_region<XYZ>>(file_, shapes_->getEndSeek(), datasize);
-
     // Initialize shapeRanges array:
-    size_t offset = 0;
     for (unsigned int i = 0; i < header->numShapes; ++i) {
         if (shapes[i].size) {
-            auto index = offset / cacheformat::XYZ_SIZE;
-            auto num_xyz = shapes[i].size / cacheformat::XYZ_SIZE;
-            auto start = xyz_->get() + index;
-            auto end = xyz_->get() + index + num_xyz;
-
-            shapeRanges.emplace_back(start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2));
+            auto start = shapes[i].offset;
+            auto end = start + shapes[i].size;
+            shapeRanges.emplace_back(file_, start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2));
         } else {
             // table entry has no data.
             // shapes[i].offset may have bogus value.
-            shapeRanges.emplace_back(nullptr, nullptr, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2));
+            shapeRanges.emplace_back(file_, -1, -1, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2));
         }
-
-        offset += shapes[i].size;
     }
 
     // Add dummy entry at back:
-    shapeRanges.emplace_back(nullptr, nullptr, header->n, XYZ(0, 0, 0));
+    shapeRanges.emplace_back(file_, -1, -1, header->n, XYZ(0, 0, 0));
 
     fileLoaded_ = true;
 
     return 0;
 }
 
+Cube CubeReadIterator::read() const {
+    Cube tmp(n);
+    m_file->readAt(m_seek, n * sizeof(XYZ), tmp.data());
+    return tmp;
+}
+
+
 IShapeRange &CacheReader::getCubesByShape(uint32_t i) {
     if (i >= header->numShapes) {
         return shapeRanges.back();
@@ -102,7 +101,6 @@ void CacheReader::unload() {
     // unload file from memory
     if (fileLoaded_) {
         shapeRanges.clear();
-        xyz_.reset();
         shapes_.reset();
         header_.reset();
         file_.reset();