Skip to content

Commit

Permalink
Fix raw bytes read in DirectBufferedInput (#10266)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #10266

The calculation of raw bytes and storage read bytes are wrong in `DirectBufferedInput`

Reviewed By: spershin

Differential Revision: D58818252

fbshipit-source-id: 2b8bc6032603ca055dfbd81a3b7b5c9cb9e42a9b
  • Loading branch information
Yuhta authored and facebook-github-bot committed Jun 20, 2024
1 parent 97cdc63 commit a605b90
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
6 changes: 3 additions & 3 deletions velox/dwio/common/DirectBufferedInput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,13 +281,13 @@ std::vector<cache::CachePin> DirectCoalescedLoad::loadData(bool prefetch) {
input_->read(buffers, requests_[0].region.offset, LogType::FILE);
}

ioStats_->read().increment(size);
ioStats_->incRawBytesRead(size - overread);
ioStats_->read().increment(size + overread);
ioStats_->incRawBytesRead(size);
ioStats_->incTotalScanTime(usecs * 1'000);
ioStats_->queryThreadIoLatency().increment(usecs);
ioStats_->incRawOverreadBytes(overread);
if (prefetch) {
ioStats_->prefetch().increment(size);
ioStats_->prefetch().increment(size + overread);
}
return {};
}
Expand Down
23 changes: 17 additions & 6 deletions velox/exec/tests/TableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,14 +275,19 @@ TEST_F(TableScanTest, allColumns) {
}

TEST_F(TableScanTest, directBufferInputRawInputBytes) {
auto vectors = makeVectors(10, 1'000);
constexpr int kSize = 10;
auto vector = makeRowVector({
makeFlatVector<int64_t>(kSize, folly::identity),
makeFlatVector<int64_t>(kSize, folly::identity),
makeFlatVector<int64_t>(kSize, folly::identity),
});
auto filePath = TempFilePath::create();
writeToFile(filePath->getPath(), vectors);
createDuckDbTable(vectors);
createDuckDbTable({vector});
writeToFile(filePath->getPath(), {vector});

auto plan = PlanBuilder(pool_.get())
.startTableScan()
.outputType(rowType_)
.outputType(ROW({"c0", "c2"}, {BIGINT(), BIGINT()}))
.endTableScan()
.planNode();

Expand All @@ -299,15 +304,21 @@ TEST_F(TableScanTest, directBufferInputRawInputBytes) {
.plan(plan)
.splits(makeHiveConnectorSplits({filePath}))
.queryCtx(queryCtx)
.assertResults("SELECT * FROM tmp");
.assertResults("SELECT c0, c2 FROM tmp");

// A quick sanity check for memory usage reporting. Check that peak total
// memory usage for the project node is > 0.
auto planStats = toPlanStats(task->taskStats());
auto scanNodeId = plan->id();
auto it = planStats.find(scanNodeId);
ASSERT_TRUE(it != planStats.end());
ASSERT_GT(it->second.rawInputBytes, 0);
auto rawInputBytes = it->second.rawInputBytes;
auto overreadBytes = getTableScanRuntimeStats(task).at("overreadBytes").sum;
ASSERT_EQ(rawInputBytes, 26);
ASSERT_EQ(overreadBytes, 13);
ASSERT_EQ(
getTableScanRuntimeStats(task).at("storageReadBytes").sum,
rawInputBytes + overreadBytes);
EXPECT_GT(getTableScanRuntimeStats(task)["totalScanTime"].sum, 0);
EXPECT_GT(getTableScanRuntimeStats(task)["queryThreadIoLatency"].sum, 0);
}
Expand Down

0 comments on commit a605b90

Please sign in to comment.