Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix multiple match joined on edge produce wrong result #4923

Merged
merged 12 commits into from
Nov 24, 2022
26 changes: 26 additions & 0 deletions src/common/datatypes/Edge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* This source code is licensed under Apache 2.0 License.
*/

#include <common/base/Logging.h>
#include <common/datatypes/Edge.h>
#include <folly/String.h>
#include <folly/hash/Hash.h>
Expand Down Expand Up @@ -141,6 +142,31 @@ bool Edge::keyEqual(const Edge& rhs) const {
return src == rhs.dst && dst == rhs.src && ranking == rhs.ranking;
}

std::string Edge::id() const {
std::string s;
if (src.type() == Value::Type::INT) {
EdgeType t = type > 0 ? type : -type;
const int64_t& srcId = type > 0 ? src.getInt() : dst.getInt();
const int64_t& dstId = type > 0 ? dst.getInt() : src.getInt();
s.reserve(sizeof(srcId) + sizeof(dstId) + sizeof(type) + sizeof(ranking));
s.append(reinterpret_cast<const char*>(&srcId), sizeof(srcId));
s.append(reinterpret_cast<const char*>(&dstId), sizeof(dstId));
s.append(reinterpret_cast<const char*>(&t), sizeof(t));
s.append(reinterpret_cast<const char*>(&ranking), sizeof(ranking));
} else {
DCHECK(src.type() == Value::Type::STRING);
EdgeType t = type > 0 ? type : -type;
const std::string& srcId = type > 0 ? src.getStr() : dst.getStr();
const std::string& dstId = type > 0 ? dst.getStr() : src.getStr();
s.reserve(srcId.size() + dstId.size() + sizeof(t) + sizeof(ranking));
s.append(srcId.data(), srcId.size());
s.append(dstId.data(), dstId.size());
s.append(reinterpret_cast<const char*>(&t), sizeof(t));
s.append(reinterpret_cast<const char*>(&ranking), sizeof(ranking));
}
return s;
}

} // namespace nebula

namespace std {
Expand Down
3 changes: 3 additions & 0 deletions src/common/datatypes/Edge.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ struct Edge {
const Value& value(const std::string& key) const;

bool keyEqual(const Edge& rhs) const;

// Return this edge's id encoded in string
std::string id() const;
};

inline std::ostream& operator<<(std::ostream& os, const Edge& v) {
Expand Down
39 changes: 39 additions & 0 deletions src/common/datatypes/test/EdgeTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,43 @@ TEST(Edge, hashEdge) {
EXPECT_NE(edge1, edge5);
}

TEST(Edge, id) {
{
Edge edge1(0, 1, 1, "like", 100, {});

Edge edge2(0, 1, 1, "like", 100, {});
EXPECT_EQ(edge1.id(), edge2.id());

Edge edge3(1, 1, 1, "like", 100, {});
EXPECT_NE(edge1.id(), edge3.id());

Edge edge4(0, 2, 1, "like", 100, {});
EXPECT_NE(edge1.id(), edge4.id());

Edge edge5(0, 1, -1, "like", 100, {});
EXPECT_NE(edge1.id(), edge5.id());

Edge edge6(0, 1, 1, "like", 101, {});
EXPECT_NE(edge1.id(), edge6.id());
}
{
Edge edge1("aaa", "bbb", 1, "like", 100, {});

Edge edge2("aaa", "bbb", 1, "like", 100, {});
EXPECT_EQ(edge1.id(), edge2.id());

Edge edge3("aab", "bbb", 1, "like", 100, {});
EXPECT_NE(edge1.id(), edge3.id());

Edge edge4("aaa", "bba", 1, "like", 100, {});
EXPECT_NE(edge1.id(), edge4.id());

Edge edge5("aaa", "bbb", 2, "like", 100, {});
EXPECT_NE(edge1.id(), edge5.id());

Edge edge6("aaa", "bbb", 1, "like", 99, {});
EXPECT_NE(edge1.id(), edge6.id());
}
}

} // namespace nebula
27 changes: 27 additions & 0 deletions src/common/function/FunctionManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1841,6 +1841,33 @@ FunctionManager::FunctionManager() {
}
};
}
{
auto &attr = functions_["_joinkey"];
attr.minArity_ = 1;
attr.maxArity_ = 1;
attr.isAlwaysPure_ = true;
attr.body_ = [](const auto &args) -> Value {
const Value &value = args[0].get();
switch (value.type()) {
case Value::Type::NULLVALUE: {
return Value::kNullValue;
}
case Value::Type::VERTEX: {
return value.getVertex().vid;
}
// NOTE:
// id() on Edge is designed to be used get a Join key when
// Join operator performed on edge, the returned id is a
// string encoded the {src, dst, type, ranking} tuple
case Value::Type::EDGE: {
codesigner marked this conversation as resolved.
Show resolved Hide resolved
return value.getEdge().id();
}
default: {
return Value::kNullBadType;
}
}
};
}
{
auto &attr = functions_["tags"];
attr.minArity_ = 1;
Expand Down
2 changes: 1 addition & 1 deletion src/graph/context/ast/CypherAstContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ struct EdgeInfo {
Expression* filter{nullptr};
};

enum class AliasType : int8_t { kNode, kEdge, kPath, kDefault };
enum class AliasType : int8_t { kNode, kEdge, kPath, kEdgeList, kDefault };

struct ScanInfo {
Expression* filter{nullptr};
Expand Down
2 changes: 1 addition & 1 deletion src/graph/planner/match/MatchPathPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ Status MatchPathPlanner::leftExpandFromNode(
auto* pool = qctx->objPool();
auto args = ArgumentList::make(pool);
args->addArgument(InputPropertyExpression::make(pool, nodeInfos[startIndex].alias));
nextTraverseStart = FunctionCallExpression::make(pool, "id", args);
nextTraverseStart = FunctionCallExpression::make(pool, "_joinkey", args);
}
bool reversely = true;
for (size_t i = startIndex; i > 0; --i) {
Expand Down
15 changes: 14 additions & 1 deletion src/graph/planner/match/MatchPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,20 @@ Status MatchPlanner::connectMatchPlan(SubPlan& queryPlan, MatchClauseContext* ma
}
std::unordered_set<std::string> intersectedAliases;
for (auto& alias : matchCtx->aliasesGenerated) {
if (matchCtx->aliasesAvailable.find(alias.first) != matchCtx->aliasesAvailable.end()) {
auto it = matchCtx->aliasesAvailable.find(alias.first);
if (it != matchCtx->aliasesAvailable.end()) {
// Joined type should be same
if (it->second != alias.second) {
return Status::SemanticError(fmt::format("{} binding to different type: {} vs {}",
alias.first,
AliasTypeName[static_cast<int>(alias.second)],
AliasTypeName[static_cast<int>(it->second)]));
}
// Joined On EdgeList is not supported
if (alias.second == AliasType::kEdgeList) {
return Status::SemanticError(alias.first +
" defined with type EdgeList, which cannot be joined on");
}
intersectedAliases.insert(alias.first);
}
}
Expand Down
1 change: 1 addition & 0 deletions src/graph/planner/match/MatchPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class MatchPlanner final : public Planner {
StatusOr<SubPlan> transform(AstContext* astCtx) override;

private:
static constexpr std::array AliasTypeName = {"Node", "Edge", "Path", "EdgeList", "Default"};
bool tailConnected_{false};
StatusOr<SubPlan> genPlan(CypherClauseContextBase* clauseCtx);
Status connectMatchPlan(SubPlan& queryPlan, MatchClauseContext* matchCtx);
Expand Down
4 changes: 2 additions & 2 deletions src/graph/planner/match/SegmentsConnector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ SubPlan SegmentsConnector::innerJoin(QueryContext* qctx,
for (auto& alias : intersectedAliases) {
auto* args = ArgumentList::make(pool);
args->addArgument(InputPropertyExpression::make(pool, alias));
auto* expr = FunctionCallExpression::make(pool, "id", args);
auto* expr = FunctionCallExpression::make(pool, "_joinkey", args);
hashKeys.emplace_back(expr);
probeKeys.emplace_back(expr->clone());
}
Expand All @@ -46,7 +46,7 @@ SubPlan SegmentsConnector::leftJoin(QueryContext* qctx,
for (auto& alias : intersectedAliases) {
auto* args = ArgumentList::make(pool);
args->addArgument(InputPropertyExpression::make(pool, alias));
auto* expr = FunctionCallExpression::make(pool, "id", args);
auto* expr = FunctionCallExpression::make(pool, "_joinkey", args);
hashKeys.emplace_back(expr);
probeKeys.emplace_back(expr->clone());
}
Expand Down
25 changes: 22 additions & 3 deletions src/graph/validator/MatchValidator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,16 +283,21 @@ Status MatchValidator::buildEdgeInfo(const MatchPath *path,
edgeInfos[i].types.emplace_back(typeName.value());
}
}
AliasType aliasType = AliasType::kEdge;
auto *stepRange = edge->range();
if (stepRange != nullptr) {
NG_RETURN_IF_ERROR(validateStepRange(stepRange));
edgeInfos[i].range = stepRange;
// Type of [e*1..2], [e*2] should be inference to EdgeList
if (stepRange->max() > stepRange->min() || stepRange->min() > 1) {
aliasType = AliasType::kEdgeList;
}
}
if (alias.empty()) {
anonymous = true;
alias = vctx_->anonVarGen()->getVar();
} else {
if (!aliases.emplace(alias, AliasType::kEdge).second) {
if (!aliases.emplace(alias, aliasType).second) {
return Status::SemanticError("`%s': Redefined alias", alias.c_str());
}
}
Expand Down Expand Up @@ -601,14 +606,28 @@ Status MatchValidator::validateUnwind(const UnwindClause *unwindClause,
}

auto labelExprs = ExpressionUtils::collectAll(unwindCtx.unwindExpr, {Expression::Kind::kLabel});
std::vector<AliasType> types;
for (auto *labelExpr : labelExprs) {
DCHECK_EQ(labelExpr->kind(), Expression::Kind::kLabel);
auto label = static_cast<const LabelExpression *>(labelExpr)->name();
if (!unwindCtx.aliasesAvailable.count(label)) {
auto it = unwindCtx.aliasesAvailable.find(label);
if (it == unwindCtx.aliasesAvailable.end()) {
return Status::SemanticError("Variable `%s` not defined", label.c_str());
}
types.push_back(it->second);
}
// UNWIND Type Inference:
// Example: UNWIND x,y AS z
// if x,y have same type
// set z to the same type
// else
// set z to default
AliasType aliasType = AliasType::kDefault;
if (types.size() > 0 &&
std::adjacent_find(types.begin(), types.end(), std::not_equal_to<>()) == types.end()) {
aliasType = types[0];
}
unwindCtx.aliasesGenerated.emplace(unwindCtx.alias, AliasType::kDefault);
unwindCtx.aliasesGenerated.emplace(unwindCtx.alias, aliasType);
if (unwindCtx.aliasesAvailable.count(unwindCtx.alias) > 0) {
return Status::SemanticError("Variable `%s` already declared", unwindCtx.alias.c_str());
}
Expand Down
32 changes: 32 additions & 0 deletions tests/tck/features/bugfix/MatchJoinOnEdge.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright (c) 2020 vesoft inc. All rights reserved.
#
# This source code is licensed under Apache 2.0 License.
Feature: Test match used in pipe

Background:
Given a graph with space named "nba"

Scenario: Multiple Match joined on edge
When executing query:
"""
MATCH (v:player)-[e:like*1..2]->(u) WHERE v.player.name=="Tim Duncan" MATCH (vv:player)-[e:like]->() WHERE vv.player.name=="Tony Parker"return v, u
"""
Then a SemanticError should be raised at runtime: e binding to different type: Edge vs EdgeList
When executing query:
"""
MATCH (v:player)-[e:like*2]->(u) WHERE v.player.name=="Tim Duncan" MATCH (vv:player)-[e:like]->() WHERE vv.player.name=="Tony Parker"return v, u
"""
Then a SemanticError should be raised at runtime: e binding to different type: Edge vs EdgeList
When executing query:
"""
MATCH (v:player)-[e:like]->() WHERE v.player.name=="Tim Duncan" MATCH (u:player)-[e:like]->() WHERE u.player.name=="Tony Parker"return v
"""
Then the result should be, in any order:
| v |
When executing query:
"""
MATCH (v:player)-[e:like]->() WHERE v.player.name=="Tim Duncan" MATCH ()-[e:like]->(u:player) WHERE u.player.name=="Tony Parker"return v, u
"""
Then the result should be, in any order:
| v | u |
| ("Tim Duncan" :player{age: 42, name: "Tim Duncan"} :bachelor{name: "Tim Duncan", speciality: "psychology"}) | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) |