Skip to content

Commit

Permalink
Remove offset from the FlatBuffers tables
Browse files Browse the repository at this point in the history
The offset being in the `table_slice` encoding-specific FlatBuffers
tables was the only reason why we required support for mutable
FlatBuffers. This commit changes `table_slice` to hold the offset
in-memory. The offset is now assigned by the importer on import and by
the segment on lookup.
  • Loading branch information
dominiklohmann committed Nov 10, 2020
1 parent 7a1f805 commit 5e0b7c3
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 53 deletions.
2 changes: 1 addition & 1 deletion libvast/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ foreach (schema ${flatbuffers_schemas})
OUTPUT ${desired_file}
COMMAND
flatbuffers::flatc -b --cpp --scoped-enums --gen-name-strings
--gen-mutable -o ${flatbuffers_output_path} ${schema}
-o ${flatbuffers_output_path} ${schema}
COMMAND ${CMAKE_COMMAND} -E rename ${output_file} ${desired_file}
COMMAND ${CMAKE_COMMAND} -P ${rename_${basename}}
DEPENDS ${schema}
Expand Down
10 changes: 0 additions & 10 deletions libvast/src/msgpack_table_slice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,16 +282,6 @@ msgpack_table_slice<FlatBuffer>::columns() const noexcept {
return layout().fields.size();
}

template <class FlatBuffer>
id msgpack_table_slice<FlatBuffer>::offset() const noexcept {
return slice_.offset();
}

template <class FlatBuffer>
void msgpack_table_slice<FlatBuffer>::offset(id offset) noexcept {
const_cast<FlatBuffer&>(slice_).mutate_offset(offset);
}

// -- data access ------------------------------------------------------------

template <class FlatBuffer>
Expand Down
2 changes: 1 addition & 1 deletion libvast/src/msgpack_table_slice_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ table_slice msgpack_table_slice_builder::finish() {
reinterpret_cast<const uint8_t*>(data_.data()), data_.size());
// Create MessagePack-encoded table slices.
auto msgpack_table_slice_buffer = fbs::table_slice::msgpack::Createv0(
builder_, invalid_id, *layout_buffer, offset_table_buffer, data_buffer);
builder_, *layout_buffer, offset_table_buffer, data_buffer);
// Create and finish table slice.
auto table_slice_buffer
= fbs::CreateTableSlice(builder_, fbs::table_slice::TableSlice::msgpack_v0,
Expand Down
58 changes: 38 additions & 20 deletions libvast/src/segment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

#include "vast/bitmap.hpp"
#include "vast/bitmap_algorithms.hpp"
#include "vast/concept/printable/to_string.hpp"
#include "vast/concept/printable/vast/table_slice.hpp"
#include "vast/detail/assert.hpp"
#include "vast/detail/byte_swap.hpp"
#include "vast/detail/narrow.hpp"
Expand Down Expand Up @@ -63,13 +65,9 @@ vast::ids segment::ids() const {
vast::ids result;
auto segment = fbs::GetSegment(chunk_->data());
auto segment_v0 = segment->segment_as_v0();
for (auto flat_slice : *segment_v0->slices()) {
// TODO: Store a separate offset table in the segment header to avoid lots
// of small disk reads, and generate the resulting `ids` bitmap from the
// offset table.
auto slice = table_slice{*flat_slice, chunk_, table_slice::verify::no};
result.append_bits(false, slice.offset() - result.size());
result.append_bits(true, slice.rows());
for (auto interval : *segment_v0->ids()) {
result.append_bits(false, interval->begin() - result.size());
result.append_bits(true, interval->end() - interval->begin());
}
return result;
}
Expand All @@ -86,29 +84,49 @@ chunk_ptr segment::chunk() const {

caf::expected<std::vector<table_slice>>
segment::lookup(const vast::ids& xs) const {
// TODO: Store a separate offset table in the segment header to avoid lots of
// small disk reads, and generate the resulting id range for each table.
std::vector<table_slice> slices;
std::vector<table_slice> result;
auto f = [&](auto flat_slice) noexcept {
// TODO: Store a separate offset table in the segment header to avoid lots
// of small disk reads here. No need to slice the chunk unless we want to
// return the table slice itself.
auto slice = table_slice{*flat_slice, chunk_, table_slice::verify::no};
auto f = [&](const table_slice& slice) noexcept {
VAST_ASSERT(slice.offset() != invalid_id);
return std::pair{slice.offset(), slice.offset() + slice.rows()};
};
auto g = [&](auto flat_slice) {
auto slice = table_slice{*flat_slice, chunk_, table_slice::verify::no};
result.push_back(std::move(slice));
auto g = [&](const table_slice& slice) {
VAST_DEBUG(this, "returns slice from lookup:", to_string(slice));
result.push_back(slice);
return caf::none;
};
auto segment = fbs::GetSegment(chunk_->data());
auto segment_v0 = segment->segment_as_v0();
auto begin = segment_v0->slices()->begin();
auto end = segment_v0->slices()->end();
if (auto error = select_with(xs, begin, end, f, g))
auto segment = fbs::GetSegment(chunk_->data())->segment_as_v0();
if (!segment)
return make_error(ec::format_error, "invalid segment version");
// Assign offsets to all table slices.
if (segment->ids()->size() == 0)
return make_error(ec::lookup_error, "encountered empty segment ids range");
slices.reserve(segment->slices()->size());
auto current_interval = segment->ids()->begin();
auto current_offset = current_interval->begin();
for (auto&& flat_slice : *segment->slices()) {
auto slice = table_slice{*flat_slice, chunk_, table_slice::verify::no};
slice.offset(current_offset);
VAST_DEBUG(this, "assigns offset", current_offset, "to slice with",
slice.rows(), "rows");
current_offset += slice.rows();
if (current_offset >= current_interval->end()
&& current_interval != segment->ids()->end()) {
VAST_ASSERT(current_offset == current_interval->end());
current_offset = (++current_interval)->begin();
}
slices.push_back(std::move(slice));
}
// Select the subset of `slices` for the given ids `xs`.
if (auto error = select_with(xs, slices.begin(), slices.end(), f, g))
return error;
return result;
}

segment::segment(chunk_ptr chk) : chunk_{std::move(chk)} {
// nop
}

} // namespace vast
17 changes: 9 additions & 8 deletions libvast/src/table_slice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ table_slice::table_slice(legacy_table_slice_ptr&& slice) noexcept {
}

table_slice::table_slice(const table_slice& other) noexcept
: chunk_{other.chunk_}, legacy_{other.legacy_} {
: chunk_{other.chunk_}, legacy_{other.legacy_}, offset_{other.offset_} {
// nop
}

Expand All @@ -235,6 +235,7 @@ table_slice::table_slice(const table_slice& other, enum encoding encoding,
if (encoding == other.encoding()) {
chunk_ = other.chunk_;
legacy_ = other.legacy_;
offset_ = other.offset_;
} else {
switch (encoding) {
case encoding::none:
Expand All @@ -255,12 +256,14 @@ table_slice::table_slice(const table_slice& other, enum encoding encoding,
table_slice& table_slice::operator=(const table_slice& rhs) noexcept {
chunk_ = rhs.chunk_;
legacy_ = rhs.legacy_;
offset_ = rhs.offset_;
return *this;
}

table_slice::table_slice(table_slice&& other) noexcept
: chunk_{std::exchange(other.chunk_, {})},
legacy_{std::exchange(other.legacy_, {})} {
legacy_{std::exchange(other.legacy_, {})},
offset_{std::exchange(other.offset_, invalid_id)} {
// nop
}

Expand All @@ -270,6 +273,7 @@ table_slice::table_slice(table_slice&& other, enum encoding encoding,
// If the encoding matches, we can just move the data.
chunk_ = std::exchange(other.chunk_, {});
legacy_ = std::exchange(other.legacy_, {});
offset_ = std::exchange(other.offset_, invalid_id);
} else {
// Changing the encoding requires a copy, so we just delegate to the
// copy-constructor with re-encoding.
Expand All @@ -281,6 +285,7 @@ table_slice::table_slice(table_slice&& other, enum encoding encoding,
table_slice& table_slice::operator=(table_slice&& rhs) noexcept {
chunk_ = std::exchange(rhs.chunk_, {});
legacy_ = std::exchange(rhs.legacy_, {});
offset_ = std::exchange(rhs.offset_, invalid_id);
return *this;
}

Expand Down Expand Up @@ -374,9 +379,7 @@ id table_slice::offset() const noexcept {
[&](const fbs::table_slice::legacy::v0*) noexcept {
return legacy_->offset();
},
[](const fbs::table_slice::msgpack::v0* slice) noexcept {
return msgpack_table_slice{*slice}.offset();
},
[&](const fbs::table_slice::msgpack::v0*) noexcept { return offset_; },
};
return visit(f, as_flatbuffer(chunk_));
}
Expand All @@ -391,9 +394,7 @@ void table_slice::offset(id offset) noexcept {
legacy_.unshared().offset(offset);
*this = table_slice{std::move(legacy_)};
},
[&](const fbs::table_slice::msgpack::v0* slice) noexcept {
return msgpack_table_slice{*slice}.offset(offset);
},
[&](const fbs::table_slice::msgpack::v0*) noexcept { offset_ = offset; },
};
visit(f, as_flatbuffer(chunk_));
}
Expand Down
3 changes: 0 additions & 3 deletions libvast/vast/fbs/table_slice.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@ namespace vast.fbs.table_slice.msgpack;

/// A table slice encoded with MessagePack.
table v0 {
/// The offset in the 2^64 ID event space.
offset: ulong;

/// The schema of the data.
/// TODO: currently CAF binary; make this a separate table.
layout: [ubyte];
Expand Down
8 changes: 0 additions & 8 deletions libvast/vast/msgpack_table_slice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,6 @@ class msgpack_table_slice final {
/// @returns The number of columns in the slice.
table_slice::size_type columns() const noexcept;

// FIXME: Remove this when storing the offset separately from the FlatBuffer.
/// @returns The offset in the ID space.
id offset() const noexcept;

// FIXME: Remove this when storing the offset separately from the FlatBuffer.
/// Sets the offset in the ID space.
void offset(id offset) noexcept;

// -- data access ------------------------------------------------------------

/// Appends all values in column `column` to `index`.
Expand Down
3 changes: 1 addition & 2 deletions libvast/vast/table_slice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,7 @@ class table_slice final {
/// @note Assigned by the importer on import and the archive on export and as
/// such not part of the FlatBuffers table. Binary representations of a table
/// slice do not contain the offset.
// FIXME: Save offset separately from other data, as it must be mutable.
// id offset_ = invalid_id;
id offset_ = invalid_id;

/// The number of in-memory table slices.
inline static std::atomic<size_t> num_instances_ = {};
Expand Down

0 comments on commit 5e0b7c3

Please sign in to comment.