Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix duplicate output column names of join #4965

Merged
merged 2 commits into from
Dec 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions src/graph/executor/query/InnerJoinExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,18 +244,6 @@ void InnerJoinExecutor::buildNewRow(const std::unordered_map<T, std::vector<cons
}
}

Row InnerJoinExecutor::newRow(Row left, Row right) const {
Row r;
r.reserve(left.size() + right.size());
r.values.insert(r.values.end(),
std::make_move_iterator(left.values.begin()),
std::make_move_iterator(left.values.end()));
r.values.insert(r.values.end(),
std::make_move_iterator(right.values.begin()),
std::make_move_iterator(right.values.end()));
return r;
}

const std::string& InnerJoinExecutor::leftVar() const {
if (node_->kind() == PlanNode::Kind::kBiInnerJoin) {
return node_->asNode<BiJoin>()->leftInputVar();
Expand Down
3 changes: 0 additions & 3 deletions src/graph/executor/query/InnerJoinExecutor.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,6 @@ class InnerJoinExecutor : public JoinExecutor {
Row rRow,
DataSet& ds) const;

// concat rows
Row newRow(Row left, Row right) const;

const std::string& leftVar() const;

const std::string& rightVar() const;
Expand Down
43 changes: 34 additions & 9 deletions src/graph/executor/query/JoinExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,25 @@ Status JoinExecutor::checkBiInputDataSets() {
return Status::Error(ss.str());
}
colSize_ = join->colNames().size();

auto& lhsResult = ectx_->getResult(join->leftInputVar());
auto& lhsColNames = lhsResult.valuePtr()->getDataSet().colNames;
auto lhsColSize = lhsColNames.size();

auto& rhsResult = ectx_->getResult(join->rightInputVar());
auto& rhsColNames = rhsResult.valuePtr()->getDataSet().colNames;
auto rhsColSize = rhsColNames.size();

DCHECK_LE(colSize_, lhsColSize + rhsColSize);
if (colSize_ < lhsColSize + rhsColSize) {
for (size_t i = 0; i < rhsColSize; ++i) {
auto it = std::find(lhsColNames.begin(), lhsColNames.end(), rhsColNames[i]);
if (it == lhsColNames.end()) {
rhsOutputColIdxs_.push_back(i);
}
}
}

return Status::OK();
}

Expand Down Expand Up @@ -83,15 +102,21 @@ void JoinExecutor::buildSingleKeyHashTable(
}

Row JoinExecutor::newRow(Row left, Row right) const {
Row r;
r.reserve(left.size() + right.size());
r.values.insert(r.values.end(),
std::make_move_iterator(left.values.begin()),
std::make_move_iterator(left.values.end()));
r.values.insert(r.values.end(),
std::make_move_iterator(right.values.begin()),
std::make_move_iterator(right.values.end()));
return r;
Row row;
row.reserve(left.size() + right.size());
row.values.insert(row.values.end(),
std::make_move_iterator(left.values.begin()),
std::make_move_iterator(left.values.end()));
if (!rhsOutputColIdxs_.empty()) {
for (auto idx : rhsOutputColIdxs_) {
row.values.emplace_back(std::move(right.values[idx]));
}
} else {
jievince marked this conversation as resolved.
Show resolved Hide resolved
row.values.insert(row.values.end(),
std::make_move_iterator(right.values.begin()),
std::make_move_iterator(right.values.end()));
}
return row;
}

} // namespace graph
Expand Down
6 changes: 6 additions & 0 deletions src/graph/executor/query/JoinExecutor.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,13 @@ class JoinExecutor : public Executor {

std::unique_ptr<Iterator> lhsIter_;
std::unique_ptr<Iterator> rhsIter_;
// colSize_ is the size of the output columns of join executor.
// If the join is natural join, which means the left and right columns have duplicated names,
// the output columns will be a intersection of the left and right columns.
size_t colSize_{0};
// If the join is natural join, rhsOutputColIdxs_ will be used to record the output column index
// of the right. If not, rhsOutputColIdxs_ will be empty.
std::vector<size_t> rhsOutputColIdxs_;
std::unordered_map<Value, std::vector<const Row*>> hashTable_;
std::unordered_map<List, std::vector<const Row*>> listHashTable_;
};
Expand Down
164 changes: 164 additions & 0 deletions src/graph/executor/test/JoinTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "graph/executor/query/InnerJoinExecutor.h"
#include "graph/executor/query/LeftJoinExecutor.h"
#include "graph/executor/test/QueryTestBase.h"
#include "graph/planner/plan/Logic.h"
#include "graph/planner/plan/Query.h"

namespace nebula {
Expand Down Expand Up @@ -68,6 +69,43 @@ class JoinTest : public QueryTestBase {
qctx_->symTable()->newVariable("empty_var2");
qctx_->ectx()->setResult("empty_var2", ResultBuilder().value(Value(std::move(ds))).build());
}
{
DataSet ds;
ds.colNames = {"v1", "e1", "v2", "v3"};
Row row1;
row1.values.emplace_back(1);
row1.values.emplace_back(2);
row1.values.emplace_back(3);
row1.values.emplace_back(4);
ds.rows.emplace_back(std::move(row1));

Row row2;
row2.values.emplace_back(11);
row2.values.emplace_back(12);
row2.values.emplace_back(3);
row2.values.emplace_back(4);
ds.rows.emplace_back(std::move(row2));

qctx_->symTable()->newVariable("var4");
qctx_->ectx()->setResult("var4", ResultBuilder().value(Value(std::move(ds))).build());
}
{
DataSet ds;
ds.colNames = {"v2", "e2", "v3"};
Row row1;
row1.values.emplace_back(3);
row1.values.emplace_back(0);
row1.values.emplace_back(4);
ds.rows.emplace_back(std::move(row1));

Row row2;
row2.values.emplace_back(3);
row2.values.emplace_back(0);
row2.values.emplace_back(5);
ds.rows.emplace_back(std::move(row2));
qctx_->symTable()->newVariable("var5");
qctx_->ectx()->setResult("var5", ResultBuilder().value(Value(std::move(ds))).build());
}
}

void testInnerJoin(std::string left, std::string right, DataSet& expected, int64_t line);
Expand Down Expand Up @@ -171,6 +209,65 @@ TEST_F(JoinTest, InnerJoin) {
testInnerJoin("var2", "var1", expected, __LINE__);
}

TEST_F(JoinTest, BiInnerJoin) {
DataSet expected;
expected.colNames = {"v1", "e1", "v2", "v3", "e2"};
Row row1;
row1.values.emplace_back(1);
row1.values.emplace_back(2);
row1.values.emplace_back(3);
row1.values.emplace_back(4);
row1.values.emplace_back(0);
expected.rows.emplace_back(std::move(row1));

Row row2;
row2.values.emplace_back(11);
row2.values.emplace_back(12);
row2.values.emplace_back(3);
row2.values.emplace_back(4);
row2.values.emplace_back(0);
expected.rows.emplace_back(std::move(row2));

// $var4 inner join $var5 on $var4.v2 = $var5.v2, $var4.v3 = $var4.v3
auto key1 = VariablePropertyExpression::make(pool_, "var4", "v2");
auto key2 = VariablePropertyExpression::make(pool_, "var4", "v3");
std::vector<Expression*> hashKeys = {key1, key2};
auto probe1 = VariablePropertyExpression::make(pool_, "var5", "v2");
auto probe2 = VariablePropertyExpression::make(pool_, "var5", "v3");
std::vector<Expression*> probeKeys = {probe1, probe2};

auto lhs = Project::make(qctx_.get(), nullptr, nullptr);
lhs->setOutputVar("var4");
lhs->setColNames({"v1", "e1", "v2", "v3"});
auto rhs = Project::make(qctx_.get(), nullptr, nullptr);
rhs->setOutputVar("var5");
rhs->setColNames({"v2", "e2", "v3"});

auto* join = BiInnerJoin::make(qctx_.get(), lhs, rhs, std::move(hashKeys), std::move(probeKeys));

auto joinExe = std::make_unique<BiInnerJoinExecutor>(join, qctx_.get());
auto future = joinExe->execute();
auto status = std::move(future).get();
EXPECT_TRUE(status.ok());
auto& result = qctx_->ectx()->getResult(join->outputVar());

DataSet resultDs;
resultDs.colNames = {"v1", "e1", "v2", "v3", "e2"};
auto iter = result.iter();
for (; iter->valid(); iter->next()) {
const auto& cols = *iter->row();
Row row;
for (size_t i = 0; i < cols.size(); ++i) {
Value col = cols[i];
row.values.emplace_back(std::move(col));
}
resultDs.rows.emplace_back(std::move(row));
}

EXPECT_EQ(resultDs, expected);
EXPECT_EQ(result.state(), Result::State::kSuccess);
}

TEST_F(JoinTest, InnerJoinTwice) {
std::string joinOutput;
{
Expand Down Expand Up @@ -283,6 +380,73 @@ TEST_F(JoinTest, LeftJoin) {
testLeftJoin("var1", "var2", expected, __LINE__);
}

TEST_F(JoinTest, BiLeftJoin) {
DataSet expected;
expected.colNames = {"v2", "e2", "v3", "v1", "e1"};
Row row1;
row1.values.emplace_back(3);
row1.values.emplace_back(0);
row1.values.emplace_back(4);
row1.values.emplace_back(1);
row1.values.emplace_back(2);
expected.rows.emplace_back(std::move(row1));

Row row2;
row2.values.emplace_back(3);
row2.values.emplace_back(0);
row2.values.emplace_back(4);
row2.values.emplace_back(11);
row2.values.emplace_back(12);
expected.rows.emplace_back(std::move(row2));

Row row3;
row3.values.emplace_back(3);
row3.values.emplace_back(0);
row3.values.emplace_back(5);
row3.values.emplace_back(Value::kNullValue);
row3.values.emplace_back(Value::kNullValue);
expected.rows.emplace_back(std::move(row3));

// $var5 left join $var4 on $var5.v2 = $var4.v2, $var5.v3 = $var4.v3
auto key1 = VariablePropertyExpression::make(pool_, "var5", "v2");
auto key2 = VariablePropertyExpression::make(pool_, "var5", "v3");
std::vector<Expression*> hashKeys = {key1, key2};
auto probe1 = VariablePropertyExpression::make(pool_, "var4", "v2");
auto probe2 = VariablePropertyExpression::make(pool_, "var4", "v3");
std::vector<Expression*> probeKeys = {probe1, probe2};

auto lhs = Project::make(qctx_.get(), nullptr, nullptr);
lhs->setOutputVar("var5");
lhs->setColNames({"v2", "e2", "v3"});
auto rhs = Project::make(qctx_.get(), nullptr, nullptr);
rhs->setOutputVar("var4");
rhs->setColNames({"v1", "e1", "v2", "v3"});

auto* join = BiLeftJoin::make(qctx_.get(), lhs, rhs, std::move(hashKeys), std::move(probeKeys));

auto joinExe = std::make_unique<BiLeftJoinExecutor>(join, qctx_.get());
auto future = joinExe->execute();
auto status = std::move(future).get();
EXPECT_TRUE(status.ok());
auto& result = qctx_->ectx()->getResult(join->outputVar());

DataSet resultDs;
resultDs.colNames = {"v2", "e2", "v3", "v1", "e1"};
auto iter = result.iter();
for (; iter->valid(); iter->next()) {
const auto& cols = *iter->row();
Row row;
for (size_t i = 0; i < cols.size(); ++i) {
Value col = cols[i];
row.values.emplace_back(std::move(col));
}
resultDs.rows.emplace_back(std::move(row));
}

EXPECT_EQ(resultDs, expected);
EXPECT_EQ(result.state(), Result::State::kSuccess);
}

TEST_F(JoinTest, LeftJoinTwice) {
std::string joinOutput;
{
Expand Down
8 changes: 6 additions & 2 deletions src/graph/planner/plan/Query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -867,8 +867,12 @@ BiJoin::BiJoin(QueryContext* qctx,
hashKeys_(std::move(hashKeys)),
probeKeys_(std::move(probeKeys)) {
auto lColNames = left->colNames();
auto rColNames = right->colNames();
lColNames.insert(lColNames.end(), rColNames.begin(), rColNames.end());
auto& rColNames = right->colNames();
for (auto& rColName : rColNames) {
if (std::find(lColNames.begin(), lColNames.end(), rColName) == lColNames.end()) {
lColNames.emplace_back(rColName);
}
}
setColNames(lColNames);
}

Expand Down
67 changes: 67 additions & 0 deletions tests/tck/features/match/MultiQueryParts.feature
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,75 @@ Feature: Multi Query Parts
| o.player.name |
| "Tim Duncan" |
| "Tim Duncan" |
# Both the 2 match statement contains variable v1 and v4
When profiling query:
"""
MATCH (v1:player)-[:like*2..2]->(v2)-[e3:like]->(v4) where id(v1) == "Tony Parker"
MATCH (v3:player)-[:like]->(v1)<-[e5]-(v4) where id(v3) == "Tim Duncan" return *
"""
Then the result should be, in any order, with relax comparison:
| v1 | v2 | e3 | v4 | v3 | e5 |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Manu Ginobili") | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan" ) | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan" ) | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"LaMarcus Aldridge" @0 {likeness: 90}] | ("LaMarcus Aldridge") | ("Tim Duncan") | [:like "LaMarcus Aldridge"->"Tony Parker" @0 {}] |
# The redudant Project after BiInnerJoin is removed now
And the execution plan should be:
| id | name | dependencies | profiling data | operator info |
| 19 | BiInnerJoin | 7,14 | | |
| 7 | Project | 6 | | |
| 6 | AppendVertices | 5 | | |
| 5 | Traverse | 20 | | |
| 20 | Traverse | 2 | | |
| 2 | Dedup | 1 | | |
| 1 | PassThrough | 3 | | |
| 3 | Start | | | |
| 14 | Project | 13 | | |
| 13 | AppendVertices | 12 | | |
| 12 | Traverse | 21 | | |
| 21 | Traverse | 9 | | |
| 9 | Dedup | 8 | | |
| 8 | PassThrough | 10 | | |
| 10 | Start | | | |

Scenario: Optional Match
When profiling query:
"""
MATCH (v1:player)-[:like*2..2]->(v2)-[e3:like]->(v4) where id(v1) == "Tony Parker"
OPTIONAL MATCH (v3:player)-[:like]->(v1)<-[e5]-(v4) where id(v3) == "Tim Duncan" return *
"""
Then the result should be, in any order, with relax comparison:
| v1 | v2 | e3 | v4 | v3 | e5 |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"LaMarcus Aldridge" @0 {likeness: 90}] | ("LaMarcus Aldridge") | ("Tim Duncan") | [:like "LaMarcus Aldridge"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker") | __NULL__ | __NULL__ |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Tony Parker" @0 {likeness: 95}] | ("Tony Parker") | __NULL__ | __NULL__ |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Tim Duncan" @0 {likeness: 95}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | [:like "Manu Ginobili"->"Tim Duncan" @0 {likeness: 90}] | ("Tim Duncan") | ("Tim Duncan") | [:teammate "Tim Duncan"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tony Parker") | [:like "Tony Parker"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
| ("Tony Parker") | ("Tim Duncan") | [:like "Tim Duncan"->"Manu Ginobili" @0 {likeness: 95}] | ("Manu Ginobili") | ("Tim Duncan") | [:teammate "Manu Ginobili"->"Tony Parker" @0 {}] |
# The redudant Project after BiLeftJoin is removed now
And the execution plan should be:
| id | name | dependencies | profiling data | operator info |
| 19 | BiLeftJoin | 7,14 | | |
| 7 | Project | 6 | | |
| 6 | AppendVertices | 5 | | |
| 5 | Traverse | 20 | | |
| 20 | Traverse | 2 | | |
| 2 | Dedup | 1 | | |
| 1 | PassThrough | 3 | | |
| 3 | Start | | | |
| 14 | Project | 13 | | |
| 13 | AppendVertices | 12 | | |
| 12 | Traverse | 21 | | |
| 21 | Traverse | 9 | | |
| 9 | Dedup | 8 | | |
| 8 | PassThrough | 10 | | |
| 10 | Start | | | |
When executing query:
"""
MATCH (m)-[]-(n) WHERE id(m)=="Tim Duncan"
Expand Down