diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index fb614a7b022..974509d8d35 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -170,6 +170,11 @@ jobs: make RM_DIR=false DEBUG=false J=${{ steps.cmake.outputs.j }} tck working-directory: tests/ timeout-minutes: 60 + - name: LDBC + run: | + make RM_DIR=false DEBUG=false J=${{ steps.cmake.outputs.j }} ldbc + working-directory: tests/ + timeout-minutes: 60 - name: Down cluster run: | make RM_DIR=false down diff --git a/tests/Makefile b/tests/Makefile index 5acb5635fb8..0275c0f51f0 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -2,7 +2,7 @@ # # This source code is licensed under Apache 2.0 License. -.PHONY: fmt check check-and-diff init init-all clean test tck fail up down test-all +.PHONY: fmt check check-and-diff init init-all clean test tck fail up down test-all ldbc PYPI_MIRROR = https://mirrors.aliyun.com/pypi/simple/ # PYPI_MIRROR = http://pypi.mirrors.ustc.edu.cn/simple --trusted-host pypi.mirrors.ustc.edu.cn @@ -95,7 +95,10 @@ slow-query: currdir tck: jobs slow-query $(test_j) tck/steps/test_tck.py -test-all: test tck +ldbc: currdir + $(test_j) tck/steps/test_ldbc.py + +test-all: test tck ldbc fail: currdir python3 -m pytest \ diff --git a/tests/common/utils.py b/tests/common/utils.py index 041ea0971e8..78c7ac99ffe 100644 --- a/tests/common/utils.py +++ b/tests/common/utils.py @@ -432,8 +432,9 @@ def load_csv_data( # wait heartbeat_interval_secs + 1 seconds for schema synchronization time.sleep(2) - for fd in config["files"]: - _load_data_from_file(sess, data_dir, fd) + if config["files"] is not None: + for fd in config["files"]: + _load_data_from_file(sess, data_dir, fd) return space_desc diff --git a/tests/conftest.py b/tests/conftest.py index 7802f31238d..4f79d1de866 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -216,6 +216,11 @@ def load_nba_int_vid_data(): yield load_csv_data_once("nba_int_vid") +@pytest.fixture(scope="session") +def load_ldbc_v0_3_3(): + yield load_csv_data_once("ldbc_v0_3_3") + + @pytest.fixture(scope="session") def load_student_data(): yield load_csv_data_once("student") diff --git a/tests/data/ldbc_v0_3_3/config.yaml b/tests/data/ldbc_v0_3_3/config.yaml new file mode 100644 index 00000000000..200afa3d2c7 --- /dev/null +++ b/tests/data/ldbc_v0_3_3/config.yaml @@ -0,0 +1,41 @@ +space: + name: ldbc_v0_3_3 + partitionNum: 8 + replicaFactor: 1 + vidType: FIXED_STRING(32) + charset: utf8 + collate: utf8_bin +schema: | + CREATE TAG IF NOT EXISTS `Place`(`name` string,`url` string,`type` string); + CREATE TAG IF NOT EXISTS `City`(`name` string,`url` string,`type` string); + CREATE TAG IF NOT EXISTS `Country`(`name` string,`url` string,`type` string); + CREATE TAG IF NOT EXISTS `Continent`(`name` string,`url` string,`type` string); + CREATE TAG IF NOT EXISTS `Message`(`creationDate` string,`locationIP` string,`browserUsed` string,`content` string,`length` int); + CREATE TAG IF NOT EXISTS `Comment`(`creationDate` string,`locationIP` string,`browserUsed` string,`content` string,`length` int); + CREATE TAG IF NOT EXISTS `Post`(`imageFile` string,`creationDate` string,`locationIP` string,`browserUsed` string,`language` string,`content` string,`length` int); + CREATE TAG IF NOT EXISTS `Organisation`(`type` string,`name` string,`url` string); + CREATE TAG IF NOT EXISTS `University`(`type` string,`name` string,`url` string); + CREATE TAG IF NOT EXISTS `Company`(`type` string,`name` string,`url` string); + CREATE TAG IF NOT EXISTS `Person`(`firstName` string,`lastName` string,`gender` string,`birthday` string,`creationDate` string,`locationIP` string,`browserUsed` string, `email` string, `speaks` string); + CREATE TAG IF NOT EXISTS `TagClass`(`name` string,`url` string); + CREATE TAG IF NOT EXISTS `Forum`(`title` string,`creationDate` string); + CREATE TAG IF NOT EXISTS `Tag`(`name` string,`url` string); + CREATE EDGE IF NOT EXISTS `IS_PART_OF`(); + CREATE EDGE IF NOT EXISTS `LIKES`(`creationDate` string); + CREATE EDGE IF NOT EXISTS `HAS_CREATOR`(); + CREATE EDGE IF NOT EXISTS `HAS_INTEREST`(); + CREATE EDGE IF NOT EXISTS `IS_SUBCLASS_OF`(); + CREATE EDGE IF NOT EXISTS `IS_LOCATED_IN`(); + CREATE EDGE IF NOT EXISTS `HAS_MODERATOR`(); + CREATE EDGE IF NOT EXISTS `HAS_TAG`(); + CREATE EDGE IF NOT EXISTS `WORK_AT`(`workFrom` int); + CREATE EDGE IF NOT EXISTS `REPLY_OF`(); + CREATE EDGE IF NOT EXISTS `STUDY_AT`(`classYear` int); + CREATE EDGE IF NOT EXISTS `CONTAINER_OF`(); + CREATE EDGE IF NOT EXISTS `HAS_MEMBER`(`joinDate` string); + CREATE EDGE IF NOT EXISTS `KNOWS`(`creationDate` string); + CREATE EDGE IF NOT EXISTS `HAS_TYPE`(); + CREATE TAG INDEX message_create_date ON `Message`(`creationDate`(20)); + CREATE TAG INDEX person_first_name ON `Person`(`firstName`(20)); +files: + diff --git a/tests/tck/conftest.py b/tests/tck/conftest.py index 13b2ee1fbe3..f2abc3e57a1 100644 --- a/tests/tck/conftest.py +++ b/tests/tck/conftest.py @@ -175,6 +175,7 @@ def preload_space( load_nba_data, load_nba_int_vid_data, load_student_data, + load_ldbc_v0_3_3, session, graph_spaces, ): @@ -185,6 +186,8 @@ def preload_space( graph_spaces["space_desc"] = load_nba_int_vid_data elif space == "student": graph_spaces["space_desc"] = load_student_data + elif space == "ldbc_v0_3_3": + graph_spaces["ldbc_v0_3_3"] = load_ldbc_v0_3_3 else: raise ValueError(f"Invalid space name given: {space}") resp_ok(session, f'USE {space};', True) diff --git a/tests/tck/ldbc/business_intelligence_workload/Read.feature b/tests/tck/ldbc/business_intelligence_workload/Read.feature new file mode 100644 index 00000000000..c913038b89d --- /dev/null +++ b/tests/tck/ldbc/business_intelligence_workload/Read.feature @@ -0,0 +1,558 @@ +# Copyright (c) 2021 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: LDBC Business Intelligence Workload - Read + + Background: + Given a graph with space named "ldbc_v0_3_3" + + Scenario: 1. Posting summary + When executing query: + """ + MATCH (message:Message) + WHERE message.Message.creationDate < "20110721220000000" + WITH count(message) AS totalMessageCountInt + WITH toFloat(totalMessageCountInt) AS totalMessageCount + MATCH (message:Message) + WHERE message.Message.creationDate < "20110721220000000" + AND message.Message.content IS NOT NULL + WITH + totalMessageCount, + message, + toInteger(message.Message.creationDate)/10000000000000 AS year + WITH + totalMessageCount, + year, + "Comment" IN tags(message) AS isComment, + CASE + WHEN message.Message.length < 40 THEN 0 + WHEN message.Message.length < 80 THEN 1 + WHEN message.Message.length < 160 THEN 2 + ELSE 3 + END AS lengthCategory, + count(message) AS messageCount, + floor(avg(message.Message.length)) AS averageMessageLength, + sum(message.Message.length) AS sumMessageLength + RETURN + year, + isComment, + lengthCategory, + messageCount, + averageMessageLength, + sumMessageLength, + messageCount / totalMessageCount AS percentageOfMessages + ORDER BY + year DESC, + isComment ASC, + lengthCategory ASC + """ + Then the result should be, in order: + | year | isComment | lengthCategory | messageCount | averageMessageLength | sumMessageLength | percentageOfMessages | + + Scenario: 2. Top tags for country, age, gender, time + When executing query: + """ + MATCH + (country:Country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person:Person) + <-[:HAS_CREATOR]-(message:Message)-[:HAS_TAG]->(`tag`:`Tag`) + WHERE message.Message.creationDate >= "20091231230000000" + AND message.Message.creationDate <= "20101107230000000" + AND (id(country) == "Ethiopia" OR id(country) == "Belarus") + WITH + country.Country.name AS countryName, + toInteger(message.Message.creationDate)/100000000000%100 AS month, + person.Person.gender AS gender, + floor((20130101 - person.Person.birthday) / 10000 / 5.0) AS ageGroup, + `tag`.`Tag`.name AS tagName, + message + WITH + countryName, month, gender, ageGroup, tagName, count(message) AS messageCount + WHERE messageCount > 100 + RETURN + countryName, + month, + gender, + ageGroup, + tagName, + messageCount + ORDER BY + messageCount DESC, + tagName ASC, + ageGroup ASC, + gender ASC, + month ASC, + countryName ASC + LIMIT 100 + """ + Then the result should be, in order: + | countryName | month | gender | ageGroup | tagName | messageCount | + + @skip + Scenario: 3. Tag evolution + # TODO: Need an index on tag `Tag`, and fix the expr rewrite bug on toInteger(message1.creationDate)/100000000000%100 + When executing query: + """ + WITH + 2010 AS year1, + 10 AS month1, + 2010 + toInteger(10 / 12.0) AS year2, + 10 % 12 + 1 AS month2 + MATCH (`tag`:`Tag`) + OPTIONAL MATCH (message1:Message)-[:HAS_TAG]->(`tag`) + WHERE toInteger(message1.creationDate)/10000000000000 == year1 + AND toInteger(message1.creationDate)/100000000000%100 == month1 + WITH year2, month2, `tag`, count(message1) AS countMonth1 + OPTIONAL MATCH (message2:Message)-[:HAS_TAG]->(`tag`) + WHERE toInteger(message2.creationDate)/10000000000000 == year2 + AND toInteger(message2.creationDate)/100000000000%100 == month2 + WITH + `tag`, + countMonth1, + count(message2) AS countMonth2 + RETURN + `tag`.name, + countMonth1, + countMonth2, + abs(countMonth1-countMonth2) AS diff + ORDER BY + diff DESC, + `tag`.name ASC + LIMIT 100 + """ + Then the result should be, in any order: + + Scenario: 4. Popular topics in a country + When executing query: + """ + MATCH + (country:Country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- + (person:Person)<-[:HAS_MODERATOR]-(forum:Forum)-[:CONTAINER_OF]-> + (post:Post)-[:HAS_TAG]->(:`Tag`)-[:HAS_TYPE]->(:TagClass {name: "MusicalArtist"}) + WHERE id(country) == "Burma" + RETURN + forum.Forum.id AS forumId, + forum.Forum.title AS forumTitle, + forum.Forum.creationDate AS forumCreationDate, + person.Person.id AS personId, + count(DISTINCT post) AS postCount + ORDER BY + postCount DESC, + forumId ASC + LIMIT 20 + """ + Then the result should be, in order: + | forumId | forumTitle | forumCreationDate | personId | postCount | + + Scenario: 5. Top posters in a country + When executing query: + """ + MATCH + (country:Country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- + (person:Person)<-[:HAS_MEMBER]-(forum:Forum) + WHERE id(country) == "Belarus" + WITH forum, count(person) AS numberOfMembers, forum.Forum.id AS forumId + ORDER BY numberOfMembers DESC, forumId ASC + LIMIT 100 + WITH collect(forum) AS popularForums + UNWIND popularForums AS forum + MATCH + (forum)-[:HAS_MEMBER]->(person:Person) + OPTIONAL MATCH + (person)<-[:HAS_CREATOR]-(post:Post)<-[:CONTAINER_OF]-(popularForum:Forum) + WHERE popularForum IN popularForums + RETURN + person.Person.id AS personId, + person.Person.firstName AS personFirstName, + person.Person.lastName AS personLastName, + person.Person.creationDate AS personCreationDate, + count(DISTINCT post) AS postCount + ORDER BY + postCount DESC, + personId ASC + LIMIT 100 + """ + Then the result should be, in order: + | personId | personFirstName | personLastName | personCreationDate | postCount | + + Scenario: 6. Most active posters of a given topic + When executing query: + """ + MATCH (`tag`:`Tag`)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person:Person) + WHERE id(`tag`) == "Abbas_I_of_Persia" + OPTIONAL MATCH (:Person)-[like:LIKES]->(message) + OPTIONAL MATCH (message)<-[:REPLY_OF]-(comment:`Comment`) + WITH person, count(DISTINCT like) AS likeCount, count(DISTINCT comment) AS replyCount, count(DISTINCT message) AS messageCount + RETURN + person.Person.id AS personId, + replyCount, + likeCount, + messageCount, + 1*messageCount + 2*replyCount + 10*likeCount AS score + ORDER BY + score DESC, + personId ASC + LIMIT 100 + """ + Then the result should be, in order: + | personId | replyCount | likeCount | messageCount | score | + + Scenario: 7. Most authoritative users on a given topic + When executing query: + """ + MATCH (`tag`:`Tag`) + WHERE id(`tag`) == "Arnold_Schwarzenegger" + MATCH (`tag`)<-[:HAS_TAG]-(message1:Message)-[:HAS_CREATOR]->(person1:Person) + MATCH (`tag`)<-[:HAS_TAG]-(message2:Message)-[:HAS_CREATOR]->(person1) + OPTIONAL MATCH (message2)<-[:LIKES]-(person2:Person) + OPTIONAL MATCH (person2)<-[:HAS_CREATOR]-(message3:Message)<-[like:LIKES]-(p3:Person) + RETURN + person1.Person.id AS person1Id, + count(DISTINCT like) AS authorityScore + ORDER BY + authorityScore DESC, + person1Id ASC + LIMIT 100 + """ + Then the result should be, in order: + | person1Id | authorityScore | + + Scenario: 8. Related topics + # NOTICE: I had rewrite the original query + # TODO: WHERE NOT (comment)-[:HAS_TAG]->(tag) + When executing query: + """ + MATCH + (`tag`:`Tag`)<-[:HAS_TAG]-(message:Message), + (message)<-[:REPLY_OF]-(comment:`Comment`)-[:HAS_TAG]->(relatedTag:`Tag`) + WHERE id(`tag`) == "Genghis_Khan" AND NOT `tag` == relatedTag + RETURN + relatedTag.`Tag`.name AS relatedTagName, + count(DISTINCT comment) AS count + ORDER BY + count DESC, + relatedTagName ASC + LIMIT 100 + """ + Then the result should be, in order: + | relatedTagName | count | + + Scenario: 9. Forum with related tags + When executing query: + """ + MATCH + (forum:Forum)-[:HAS_MEMBER]->(person:Person) + WITH forum, count(person) AS members + WHERE members > 200 + MATCH + (forum)-[:CONTAINER_OF]->(post1:Post)-[:HAS_TAG]-> + (:`Tag`)-[:HAS_TYPE]->(:TagClass {name: "BaseballPlayer"}) + WITH forum, count(DISTINCT post1) AS count1 + MATCH + (forum)-[:CONTAINER_OF]->(post2:Post)-[:HAS_TAG]-> + (:`Tag`)-[:HAS_TYPE]->(:TagClass {name: "ChristianBishop"}) + WITH forum, count1, count(DISTINCT post2) AS count2 + RETURN + forum.Forum.id AS forumId, + count1, + count2, + abs(count2-count1) AS diff + ORDER BY + diff DESC, + forumId ASC + LIMIT 100 + """ + Then the result should be, in order: + | forumId | count1 | count2 | diff | + + @skip + Scenario: 10. Central person for a tag + # TODO: 100 * length([(`tag`)<-[interest:HAS_INTEREST]-(friend) | interest]) + When executing query: + """ + MATCH (`tag`:`Tag`) + WHERE id(`tag`) == "John_Rhys-Davies" + OPTIONAL MATCH (`tag`)<-[interest:HAS_INTEREST]-(person:Person) + WITH `tag`, collect(person) AS interestedPersons + OPTIONAL MATCH (`tag`)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person:Person) + WHERE message.creationDate > "20120122000000000" + WITH `tag`, interestedPersons + collect(person) AS persons + UNWIND persons AS person + WITH DISTINCT `tag`, person + WITH + `tag`, + person, + 100 * length([(`tag`)<-[interest:HAS_INTEREST]-(person) | interest]) + + length([(`tag`)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person) WHERE message.creationDate > $date | message]) + AS score + OPTIONAL MATCH (person)-[:KNOWS]-(friend) + WITH + person, + score, + 100 * length([(`tag`)<-[interest:HAS_INTEREST]-(friend) | interest]) + + length([(`tag`)<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(friend) WHERE message.creationDate > $date | message]) + AS friendScore + RETURN + person.id, + score, + sum(friendScore) AS friendsScore + ORDER BY + score + friendsScore DESC, + person.id ASC + LIMIT 100 + """ + Then a SyntaxError should be raised at runtime: + + @skip + Scenario: 11. Unrelated replies + # TODO: WHERE NOT (message)-[:HAS_TAG]->(:Tag)<-[:HAS_TAG]-(reply) + When executing query: + """ + WITH ['also', 'Pope', 'that', 'James', 'Henry', 'one', 'Green'] AS blacklist + MATCH + (country:Country {name: "Germany"})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]- + (person:Person)<-[:HAS_CREATOR]-(reply:Comment)-[:REPLY_OF]->(message:Message), + (reply)-[:HAS_TAG]->(tag:Tag) + WHERE NOT (message)-[:HAS_TAG]->(:Tag)<-[:HAS_TAG]-(reply) + AND size([word IN blacklist WHERE reply.content CONTAINS word | word]) = 0 + OPTIONAL MATCH + (:Person)-[like:LIKES]->(reply) + RETURN + person.id, + tag.name, + count(DISTINCT like) AS countLikes, + count(DISTINCT reply) AS countReplies + ORDER BY + countLikes DESC, + person.id ASC, + tag.name ASC + LIMIT 100 + """ + Then a SyntaxError should be raised at runtime: + + Scenario: 12. Trending posts + When executing query: + """ + MATCH + (message:Message)-[:HAS_CREATOR]->(creator:Person), + (message)<-[like:LIKES]-(:Person) + WHERE message.Message.creationDate > "20110721220000000" + WITH message, creator, count(like) AS likeCount + WHERE likeCount > 400 + RETURN + message.Message.id AS messageId, + message.Message.creationDate AS messageCreationDate, + creator.Person.firstName AS creatorFirstName, + creator.Person.lastName AS creatorLastName, + likeCount + ORDER BY + likeCount DESC, + messageId ASC + LIMIT 100 + """ + Then the result should be, in order: + | messageId | messageCreationDate | creatorFirstName | creatorLastName | likeCount | + + Scenario: 13. Popular tags per month in a country + When executing query: + """ + MATCH (country:Country)<-[:IS_LOCATED_IN]-(message:Message) + WHERE id(country) == "Burma" + OPTIONAL MATCH (message)-[:HAS_TAG]->(`tag`:`Tag`) + WITH + toInteger(message.Message.creationDate)/10000000000000 AS year, + toInteger(message.Message.creationDate)/100000000000%100 AS month, + message, + `tag` + WITH year, month, count(message) AS popularity, `tag`, `tag`.`Tag`.name AS tagName + ORDER BY popularity DESC, tagName ASC + WITH + year, + month, + collect([`tag`.`Tag`.name, popularity]) AS popularTags + WITH + year, + month, + [popularTag IN popularTags WHERE popularTag[0] IS NOT NULL] AS popularTags + RETURN + year, + month, + popularTags[0..5] AS topPopularTags + ORDER BY + year DESC, + month ASC + LIMIT 100 + """ + Then the result should be, in order: + | year | month | topPopularTags | + + Scenario: 14. Top thread initiators + # TODO: [:REPLY_OF*0..] + When executing query: + """ + MATCH (person:Person)<-[:HAS_CREATOR]-(post:Post)<-[:REPLY_OF*0..100]-(reply:Message) + WHERE reply.Message.creationDate >= "20120531220000000" + AND reply.Message.creationDate <= "20120630220000000" + WITH person, post, reply + WHERE post.Post.creationDate >= "20120531220000000" + AND post.Post.creationDate <= "20120630220000000" + RETURN + person.Person.id AS personId, + person.Person.firstName AS personFirstName, + person.Person.lastName AS personLastName, + count(DISTINCT post) AS threadCount, + count(DISTINCT reply) AS messageCount + ORDER BY + messageCount DESC, + personId ASC + LIMIT 100 + """ + Then the result should be, in any order: + | personId | personFirstName | personLastName | threadCount | messageCount | + + Scenario: 15. Social normals + When executing query: + """ + MATCH + (country:Country) + WHERE id(country) == "Burma" + MATCH + (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person1:Person) + OPTIONAL MATCH + (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(friend1:Person), + (person1)-[:KNOWS]-(friend1) + WITH country, person1, count(friend1) AS friend1Count + WITH country, avg(friend1Count) AS socialNormalFloat + WITH country, floor(socialNormalFloat) AS socialNormal + MATCH + (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(person2:Person) + OPTIONAL MATCH + (country)<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(friend2:Person)-[:KNOWS]-(person2) + WITH country, person2, count(friend2) AS friend2Count, socialNormal + WHERE friend2Count == socialNormal + RETURN + person2.Person.id AS person2Id, + friend2Count AS count + ORDER BY + person2Id ASC + LIMIT 100 + """ + Then the result should be, in order: + | person2Id | count | + + Scenario: 16. Experts in social circle + When executing query: + """ + MATCH + (n:Person)-[:KNOWS*3..5]-(person:Person) + WHERE id(n) == "19791209310731" + WITH DISTINCT person + MATCH + (person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(:Country {name: "Pakistan"}), + (person)<-[:HAS_CREATOR]-(message:Message)-[:HAS_TAG]->(:`Tag`)-[:HAS_TYPE]-> + (:TagClass {name: "MusicalArtist"}) + MATCH + (message)-[:HAS_TAG]->(`tag`:`Tag`) + RETURN + person.Person.id AS personId, + `tag`.`Tag`.name AS tagName, + count(DISTINCT message) AS messageCount + ORDER BY + messageCount DESC, + tagName ASC, + personId ASC + LIMIT 100 + """ + Then the result should be, in order: + | personId | tagName | messageCount | + + Scenario: 17. Friend triangles + When executing query: + """ + MATCH (country:Country) + WHERE id(country) == "Spain" + MATCH (a:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) + MATCH (b:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) + MATCH (c:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country) + MATCH (a)-[:KNOWS]-(b), (b)-[:KNOWS]-(c), (c)-[:KNOWS]-(a) + WHERE a.Person.id < b.Person.id + AND b.Person.id < c.Person.id + RETURN count(*) AS count + """ + Then the result should be, in any order: + | count | + | 0 | + + Scenario: 18. How many persons have a given number of messages + # TODO: [:REPLY_OF*0..] + When executing query: + """ + MATCH (person:Person) + OPTIONAL MATCH (person)<-[:HAS_CREATOR]-(message:Message)-[:REPLY_OF*0..100]->(post:Post) + WHERE message.Message.content IS NOT NULL + AND message.Message.length < 20 + AND message.Message.creationDate > "20110722000000000" + AND post.Post.language IN ["ar"] + WITH + person, + count(message) AS messageCount + RETURN + messageCount, + count(person) AS personCount + ORDER BY + personCount DESC, + messageCount DESC + """ + Then the result should be, in order: + | messageCount | personCount | + + Scenario: 19. Stranger’s interaction + # NOTICE: A big rewritten, have to test the correctness + When executing query: + """ + MATCH + (tagClass:TagClass)<-[:HAS_TYPE]-(:`Tag`)<-[:HAS_TAG]- + (forum1:Forum)-[:HAS_MEMBER]->(stranger:Person) + WHERE id(tagClass) == "MusicalArtist" + WITH DISTINCT stranger + MATCH + (tagClass:TagClass)<-[:HAS_TYPE]-(:`Tag`)<-[:HAS_TAG]- + (forum2:Forum)-[:HAS_MEMBER]->(stranger) + WHERE id(tagClass) == "OfficeHolder" + WITH DISTINCT stranger + MATCH + (person:Person)<-[:HAS_CREATOR]-(comment:`Comment`)-[:REPLY_OF*100]->(message:Message)-[:HAS_CREATOR]->(stranger) + OPTIONAL MATCH (person)-[knows:KNOWS]-(stranger) + OPTIONAL MATCH (message)-[replyOf:REPLY_OF*100]->(:Message)-[hasCreator:HAS_CREATOR]->(stranger) + WHERE person.Person.birthday > "19890101" + AND person <> stranger + AND knows IS NULL + AND (replyOf IS NULL OR hasCreator IS NULL) + RETURN + person.Person.id AS personId, + count(DISTINCT stranger) AS strangersCount, + count(comment) AS interactionCount + ORDER BY + interactionCount DESC, + personId ASC + LIMIT 100 + """ + Then the result should be, in order: + | personId | strangersCount | interactionCount | + + Scenario: 20. High-level topics + When executing query: + """ + MATCH + (tagClass:TagClass)<-[:IS_SUBCLASS_OF*0..100]- + (:TagClass)<-[:HAS_TYPE]-(`tag`:`Tag`)<-[:HAS_TAG]-(message:Message) + WHERE id(tagClass) IN ['Writer', 'Single', 'Country'] + RETURN + tagClass.TagClass.name AS tagClassName, + count(DISTINCT message) AS messageCount + ORDER BY + messageCount DESC, + tagClassName ASC + LIMIT 100 + """ + Then the result should be, in order: + | tagClassName | messageCount | diff --git a/tests/tck/ldbc/interactive_workload/ComplexReads.feature b/tests/tck/ldbc/interactive_workload/ComplexReads.feature new file mode 100644 index 00000000000..52acc82bda7 --- /dev/null +++ b/tests/tck/ldbc/interactive_workload/ComplexReads.feature @@ -0,0 +1,349 @@ +# Copyright (c) 2021 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: LDBC Interactive Workload - Complex Reads + + Background: + Given a graph with space named "ldbc_v0_3_3" + + @skip + Scenario: 1. Friends with certain name + # TODO: shortestPath syntax is not supported for now + When executing query: + """ + MATCH p=shortestPath((person:Person)-[path:KNOWS*1..3]-(friend:Person {firstName: "$firstName"})) + WHERE id(person) == "" + WHERE person <> friend + WITH friend, length(p) AS distance + ORDER BY distance ASC, friend.lastName ASC, toInteger(friend.id) ASC + LIMIT 20 + MATCH (friend)-[:IS_LOCATED_IN]->(friendCity:Place) + OPTIONAL MATCH (friend)-[studyAt:STUDY_AT]->(uni:Organisation)-[:IS_LOCATED_IN]->(uniCity:Place) + WITH + friend, + collect( + CASE uni.name + WHEN null THEN null + ELSE [uni.name, studyAt.classYear, uniCity.name] + END + ) AS unis, + friendCity, + distance + OPTIONAL MATCH (friend)-[workAt:WORK_AT]->(company:Organisation)-[:IS_LOCATED_IN]->(companyCountry:Place) + WITH + friend, + collect( + CASE company.name + WHEN null THEN null + ELSE [company.name, workAt.workFrom, companyCountry.name] + END + ) AS companies, + unis, + friendCity, + distance + RETURN + friend.id AS friendId, + friend.lastName AS friendLastName, + distance AS distanceFromPerson, + friend.birthday AS friendBirthday, + friend.creationDate AS friendCreationDate, + friend.gender AS friendGender, + friend.browserUsed AS friendBrowserUsed, + friend.locationIP AS friendLocationIp, + friend.email AS friendEmails, + friend.speaks AS friendLanguages, + friendCity.name AS friendCityName, + unis AS friendUniversities, + companies AS friendCompanies + ORDER BY distanceFromPerson ASC, friendLastName ASC, toInteger(friendId) ASC + LIMIT 20 + """ + Then a SyntaxError should be raised at runtime: syntax error near `shortestPath' + + Scenario: 2. Recent messages by your friends + When executing query: + """ + MATCH (n:Person)-[:KNOWS]-(friend:Person)<-[:HAS_CREATOR]-(message:Message) + WHERE id(n) == "" and message.Message.creationDate <= $maxDate + RETURN + friend.Person.id AS personId, + friend.Person.firstName AS personFirstName, + friend.Person.lastName AS personLastName, + toInteger(message.Message.id) AS messageId, + CASE exists(message.Message.content) + WHEN true THEN message.Message.content + ELSE message.Message.imageFile + END AS messageContent, + message.Message.creationDate AS messageCreationDate + ORDER BY messageCreationDate DESC, messageId ASC + LIMIT 20 + """ + Then the result should be, in any order: + | personId | personFirstName | personLastName | messageId | messageContent | messageCreationDate | + + @skip + Scenario: 3. Friends and friends of friends that have been to given countries + When executing query: + # TODO: WHERE not((friend)-[:IS_LOCATED_IN]->()-[:IS_PART_OF]->(countryX)) not supported now + """ + MATCH (person:Person)-[:KNOWS*1..2]-(friend:Person)<-[:HAS_CREATOR]-(messageX:Message), + (messageX)-[:IS_LOCATED_IN]->(countryX:Place) + WHERE + id(person) == "" + AND not(person==friend) + AND not((friend)-[:IS_LOCATED_IN]->()-[:IS_PART_OF]->(countryX)) + AND countryX.name=$countryXName AND messageX.creationDate>=$startDate + AND messageX.creationDate<$endDate + WITH friend, count(DISTINCT messageX) AS xCount + MATCH (friend)<-[:HAS_CREATOR]-(messageY:Message)-[:IS_LOCATED_IN]->(countryY:Place) + WHERE + countryY.name="$countryYName" + AND not((friend)-[:IS_LOCATED_IN]->()-[:IS_PART_OF]->(countryY)) + AND messageY.creationDate>="$startDate" + AND messageY.creationDate<"$endDate" + WITH + friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + xCount, + count(DISTINCT messageY) AS yCount + RETURN + personId, + personFirstName, + personLastName, + xCount, + yCount, + xCount + yCount AS count + ORDER BY count DESC, toInteger(personId) ASC + LIMIT 20 + """ + Then a SyntaxError should be raised at runtime: + + Scenario: 4. New topics + When executing query: + """ + MATCH (person:Person)-[:KNOWS]-(:Person)<-[:HAS_CREATOR]-(post:Post)-[:HAS_TAG]->(`tag`:`Tag`) + WHERE id(person) == "" AND post.Post.creationDate >= "$startDate" + AND post.Post.creationDate < "$endDate" + WITH person, count(post) AS postsOnTag, `tag` + OPTIONAL MATCH (person)-[:KNOWS]-()<-[:HAS_CREATOR]-(oldPost:Post)-[:HAS_TAG]->(`tag`) + WHERE oldPost.Post.creationDate < $startDate + WITH person, postsOnTag, `tag`, count(oldPost) AS cp + WHERE cp == 0 + RETURN + `tag`.`Tag`.name AS tagName, + sum(postsOnTag) AS postCount + ORDER BY postCount DESC, tagName ASC + """ + Then the result should be, in any order: + | tagName | postCount | + + Scenario: 5. New groups + When executing query: + """ + MATCH (person:Person)-[:KNOWS*1..2]-(friend:Person)<-[membership:HAS_MEMBER]-(forum:Forum) + WHERE id(person) == "" AND membership.joinDate>"$minDate" + AND not(person==friend) + WITH DISTINCT friend, forum + OPTIONAL MATCH (friend)<-[:HAS_CREATOR]-(post:Post)<-[:CONTAINER_OF]-(forum) + WITH forum, count(post) AS postCount + RETURN + toInteger(forum.Forum.id) AS forumId, + forum.Forum.title AS forumTitle, + postCount + ORDER BY postCount DESC, forumId ASC + LIMIT 20 + """ + Then the result should be, in any order: + | forumId | forumTitle | postCount | + + @skip + Scenario: 6. Tag co-occurrence + # TODO: WHERE (commonPost)-[:HAS_CREATOR]->(friend) + When executing query: + """ + MATCH + (person:Person)-[:KNOWS*1..2]-(friend:Person), + (friend)<-[:HAS_CREATOR]-(friendPost:Post)-[:HAS_TAG]->(knownTag:`Tag` {name:"$tagName"}) + WHERE id(person) == "" AND not(person==friend) + MATCH (friendPost)-[:HAS_TAG]->(commonTag:`Tag`) + WHERE not(commonTag==knownTag) + WITH DISTINCT commonTag, knownTag, friend + MATCH (commonTag)<-[:HAS_TAG]-(commonPost:Post)-[:HAS_TAG]->(knownTag) + WHERE (commonPost)-[:HAS_CREATOR]->(friend) + RETURN + commonTag.name AS tagName, + count(commonPost) AS postCount + ORDER BY postCount DESC, tagName ASC + LIMIT 10 + """ + Then a SyntaxError should be raised at runtime: + + @skip + Scenario: 7. Recent likers + # TODO: RETURN not((liker)-[:KNOWS]-(person)) AS isNew + When executing query: + """ + MATCH (person:Person)<-[:HAS_CREATOR]-(message:Message)<-[like:LIKES]-(liker:Person) + WHERE id(person) == "" + WITH liker, message, like.creationDate AS likeTime, person + ORDER BY likeTime DESC, toInteger(message.id) ASC + WITH + liker, + head(collect({msg: message, likeTime: likeTime})) AS latestLike, + person + RETURN + toInteger(liker.id) AS personId, + liker.firstName AS personFirstName, + liker.lastName AS personLastName, + latestLike.likeTime AS likeCreationDate, + latestLike.msg.id AS messageId, + CASE exists(latestLike.msg.content) + WHEN true THEN latestLike.msg.content + ELSE latestLike.msg.imageFile + END AS messageContent, + latestLike.msg.creationDate AS messageCreationDate, + not((liker)-[:KNOWS]-(person)) AS isNew + ORDER BY likeCreationDate DESC, personId ASC + LIMIT 20 + """ + Then a SyntaxError should be raised at runtime: + + Scenario: 8. Recent replies + When executing query: + """ + MATCH + (start:Person)<-[:HAS_CREATOR]-(:Message)<-[:REPLY_OF]-(comment:`Comment`)-[:HAS_CREATOR]->(person:Person) + WHERE id(start) == "" + RETURN + person.Person.id AS personId, + person.Person.firstName AS personFirstName, + person.Person.lastName AS personLastName, + comment.`Comment`.creationDate AS commentCreationDate, + toInteger(comment.`Comment`.id) AS commentId, + comment.`Comment`.content AS commentContent + ORDER BY commentCreationDate DESC, commentId ASC + LIMIT 20 + """ + Then the result should be, in any order: + | personId | personFirstName | personLastName | commentCreationDate | commentId | commentContent | + + Scenario: 9. Recent messages by friends or friends of friends + When executing query: + """ + MATCH (n:Person)-[:KNOWS*1..2]-(friend:Person)<-[:HAS_CREATOR]-(message:Message) + WHERE id(n) == "" AND message.Message.creationDate < "$maxDate" + RETURN DISTINCT + friend.Person.id AS personId, + friend.Person.firstName AS personFirstName, + friend.Person.lastName AS personLastName, + toInteger(message.Message.id) AS messageId, + CASE exists(message.Message.content) + WHEN true THEN message.Message.content + ELSE message.Message.imageFile + END AS messageContent, + message.Message.creationDate AS messageCreationDate + ORDER BY messageCreationDate DESC, messageId ASC + LIMIT 20 + """ + Then the result should be, in any order: + | personId | personFirstName | personLastName | messageId | messageContent | messageCreationDate | + + @skip + Scenario: 10. Friend recommendation + # TODO: WHERE patterns, WITH patterns + When executing query: + """ + MATCH (person:Person)-[:KNOWS*2..2]-(friend:Person)-[:IS_LOCATED_IN]->(city:Place) + WHERE id(person) == "" AND + ((friend.birthday/100%100 = "$month" AND friend.birthday%100 >= 21) OR + (friend.birthday/100%100 = "$nextMonth" AND friend.birthday%100 < 22)) + AND not(friend=person) + AND not((friend)-[:KNOWS]-(person)) + WITH DISTINCT friend, city, person + OPTIONAL MATCH (friend)<-[:HAS_CREATOR]-(post:Post) + WITH friend, city, collect(post) AS posts, person + WITH + friend, + city, + length(posts) AS postCount, + length([p IN posts WHERE (p)-[:HAS_TAG]->(:`Tag`)<-[:HAS_INTEREST]-(person)]) AS commonPostCount + RETURN + friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + commonPostCount - (postCount - commonPostCount) AS commonInterestScore, + friend.gender AS personGender, + city.name AS personCityName + ORDER BY commonInterestScore DESC, toInteger(personId) ASC + LIMIT 10 + """ + Then a SyntaxError should be raised at runtime: + + Scenario: 11. Job referral + When executing query: + """ + MATCH (person:Person)-[:KNOWS*1..2]-(friend:Person) + WHERE id(person) == "" AND not(person==friend) + WITH DISTINCT friend + MATCH (friend)-[workAt:WORK_AT]->(company:Organisation)-[:IS_LOCATED_IN]->(:Place {name:"$countryName"}) + WHERE workAt.workFrom < $workFromYear + RETURN + toInteger(friend.Person.id) AS personId, + friend.Person.firstName AS personFirstName, + friend.Person.lastName AS personLastName, + company.Organisation.name AS organizationName, + workAt.workFrom AS organizationWorkFromYear + ORDER BY organizationWorkFromYear ASC, personId ASC, organizationName DESC + LIMIT 10 + """ + Then the result should be, in any order: + | personId | personFirstName | personLastName | organizationName | organizationWorkFromYear | + + Scenario: 12. Expert search + # TODO: [:IS_SUBCLASS_OF*0..] + When executing query: + """ + MATCH (n:Person)-[:KNOWS]-(friend:Person)<-[:HAS_CREATOR]-(`comment`:`Comment`)-[:REPLY_OF]->(:Post)-[:HAS_TAG]->(`tag`:`Tag`), + (`tag`)-[:HAS_TYPE]->(tagClass:TagClass)-[:IS_SUBCLASS_OF*0..100]->(baseTagClass:TagClass) + WHERE id(n)=="" AND (tagClass.TagClass.name == "$tagClassName" OR baseTagClass.TagClass.name == "$tagClassName") + RETURN + toInteger(friend.Person.id) AS personId, + friend.Person.firstName AS personFirstName, + friend.Person.lastName AS personLastName, + collect(DISTINCT `tag`.`Tag`.name) AS tagNames, + count(DISTINCT `comment`) AS replyCount + ORDER BY replyCount DESC, personId ASC + LIMIT 20 + """ + Then the result should be, in any order: + | personId | personFirstName | personLastName | tagNames | replyCount | + + @skip + Scenario: 13. Single shortest path + # TODO: shortestPath + When executing query: + """ + MATCH (person1:Person {id:$person1Id}), (person2:Person {id:$person2Id}) + OPTIONAL MATCH path = shortestPath((person1)-[:KNOWS*]-(person2)) + RETURN + CASE path IS NULL + WHEN true THEN -1 + ELSE length(path) + END AS shortestPathLength; + """ + Then a SyntaxError should be raised at runtime: + + @skip + Scenario: 14. Trusted connection paths + # TODO: allShortestPaths + When executing query: + """ + MATCH path = allShortestPaths((person1:Person {id:$person1Id})-[:KNOWS*..15]-(person2:Person {id:$person2Id})) + WITH nodes(path) AS pathNodes + RETURN + extract(n IN pathNodes | n.id) AS personIdsInPath, + reduce(weight=0.0, idx IN range(1,size(pathNodes)-1) | extract(prev IN [pathNodes[idx-1]] | extract(curr IN [pathNodes[idx]] | weight + length((curr)<-[:HAS_CREATOR]-(:Comment)-[:REPLY_OF]->(:Post)-[:HAS_CREATOR]->(prev))*1.0 + length((prev)<-[:HAS_CREATOR]-(:Comment)-[:REPLY_OF]->(:Post)-[:HAS_CREATOR]->(curr))*1.0 + length((prev)-[:HAS_CREATOR]-(:Comment)-[:REPLY_OF]-(:Comment)-[:HAS_CREATOR]-(curr))*0.5) )[0][0]) AS pathWight + ORDER BY pathWight DESC + """ + Then a SyntaxError should be raised at runtime: diff --git a/tests/tck/ldbc/interactive_workload/ShortReads.feature b/tests/tck/ldbc/interactive_workload/ShortReads.feature new file mode 100644 index 00000000000..f7c40564202 --- /dev/null +++ b/tests/tck/ldbc/interactive_workload/ShortReads.feature @@ -0,0 +1,131 @@ +# Copyright (c) 2021 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: LDBC Interactive Workload - Short Reads + + Background: + Given a graph with space named "ldbc_v0_3_3" + + Scenario: 1. Friends with certain name + When executing query: + """ + MATCH (n:Person)-[:IS_LOCATED_IN]->(p:Place) + WHERE id(n)=="" + RETURN + n.Person.firstName AS firstName, + n.Person.lastName AS lastName, + n.Person.birthday AS birthday, + n.Person.locationIP AS locationIP, + n.Person.browserUsed AS browserUsed, + p.Place.id AS cityId, + n.Person.gender AS gender, + n.Person.creationDate AS creationDate + """ + Then the result should be, in any order: + | firstName | lastName | birthday | locationIP | browserUsed | cityId | gender | creationDate | + + Scenario: 2. Recent messages of a person + # TODO: [:REPLY_OF*0..] is not supported, instead by [:REPLY_OF*0..100] for now + When executing query: + """ + MATCH (n:Person)<-[:HAS_CREATOR]-(m:Message)-[:REPLY_OF*0..100]->(p:Post) + WHERE id(n)=="" + MATCH (p)-[:HAS_CREATOR]->(c) + RETURN + m.Message.id as messageId, + CASE exists(m.Message.content) + WHEN true THEN m.Message.content + ELSE m.Message.imageFile + END AS messageContent, + m.Message.creationDate AS messageCreationDate, + p.Post.id AS originalPostId, + c.Person.id AS originalPostAuthorId, + c.Person.firstName as originalPostAuthorFirstName, + c.Person.lastName as originalPostAuthorLastName + ORDER BY messageCreationDate DESC + LIMIT 10 + """ + Then the result should be, in any order: + | messageId | messageContent | messageCreationDate | originalPostId | originalPostAuthorId | originalPostAuthorFirstName | originalPostAuthorLastName | + + Scenario: 3. Friends of a person + When executing query: + """ + MATCH (n:Person)-[r:KNOWS]-(friend) + WHERE id(n) == "" + RETURN + toInteger(friend.Person.id) AS personId, + friend.Person.firstName AS firstName, + friend.Person.lastName AS lastName, + r.creationDate AS friendshipCreationDate + ORDER BY friendshipCreationDate DESC, personId ASC + """ + Then the result should be, in any order: + | personId | firstName | lastName | friendshipCreationDate | + + Scenario: 4. Content of a message + When executing query: + """ + MATCH (m:Message) + WHERE id(m) == "" + RETURN + m.Message.creationDate as messageCreationDate, + CASE exists(m.Message.content) + WHEN true THEN m.Message.content + ELSE m.Message.imageFile + END AS messageContent + """ + Then the result should be, in any order: + | messageCreationDate | messageContent | + + Scenario: 5. Given a Message, retrieve its author + When executing query: + """ + MATCH (m:Message)-[:HAS_CREATOR]->(p:Person) + WHERE id(m) == "" + RETURN + p.Person.id AS personId, + p.Person.firstName AS firstName, + p.Person.lastName AS lastName + """ + Then the result should be, in any order: + | personId | firstName | lastName | + + Scenario: 6. Forum of a message + # TODO: [:REPLY_OF*0..] is not supported, instead by [:REPLY_OF*0..100] for now + When executing query: + """ + MATCH (m:Message)-[:REPLY_OF*0..100]->(p:Post)<-[:CONTAINER_OF]-(f:Forum)-[:HAS_MODERATOR]->(mod:Person) + WHERE id(m) == "" + RETURN + f.Forum.id AS forumId, + f.Forum.title AS forumTitle, + mod.Person.id AS moderatorId, + mod.Person.firstName AS moderatorFirstName, + mod.Person.lastName AS moderatorLastName + """ + Then the result should be, in any order: + | forumId | forumTitle | moderatorId | moderatorFirstName | moderatorLastName | + + Scenario: 7. Replies of a message + # Notice: Comment is a keyword, instead by `Comment` + When executing query: + """ + MATCH (m:Message)<-[:REPLY_OF]-(c:`Comment`)-[:HAS_CREATOR]->(p:Person) + WHERE id(m) == "" + OPTIONAL MATCH (m)-[:HAS_CREATOR]->(a:Person)-[r:KNOWS]-(p) + RETURN + c.`Comment`.id AS commentId, + c.`Comment`.content AS commentContent, + c.`Comment`.creationDate AS commentCreationDate, + p.Person.id AS replyAuthorId, + p.Person.firstName AS replyAuthorFirstName, + p.Person.lastName AS replyAuthorLastName, + CASE r + WHEN null THEN false + ELSE true + END AS replyAuthorKnowsOriginalMessageAuthor + ORDER BY commentCreationDate DESC, replyAuthorId + """ + Then the result should be, in any order: + | commentId | commentContent | commentCreationDate | replyAuthorId | replyAuthorFirstName | replyAuthorLastName | replyAuthorKnowsOriginalMessageAuthor | diff --git a/tests/tck/steps/test_ldbc.py b/tests/tck/steps/test_ldbc.py new file mode 100644 index 00000000000..3a3a3c0e407 --- /dev/null +++ b/tests/tck/steps/test_ldbc.py @@ -0,0 +1,7 @@ +# Copyright (c) 2021 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. + +from pytest_bdd import scenarios + +scenarios('ldbc')