diff --git a/watchman/query/QueryExpr.h b/watchman/query/QueryExpr.h index c21b4bcf69b6..19a8553dc152 100644 --- a/watchman/query/QueryExpr.h +++ b/watchman/query/QueryExpr.h @@ -77,6 +77,15 @@ class QueryExpr { */ virtual std::optional> computeGlobUpperBound( CaseSensitivity) const = 0; + + enum ReturnOnlyFiles { No, Yes, Unrelated }; + + /** + * Returns whether this expression only returns files. + * Used to determine if Eden can use the faster server-based + * method to handle this query. + */ + virtual ReturnOnlyFiles listOnlyFiles() const = 0; }; } // namespace watchman diff --git a/watchman/query/base.cpp b/watchman/query/base.cpp index b684dd7a5d20..635ee9e2d6d7 100644 --- a/watchman/query/base.cpp +++ b/watchman/query/base.cpp @@ -50,6 +50,20 @@ class NotExpr : public QueryExpr { // the inner expression is. return std::nullopt; } + + /** + * Inverts the result of the subexpression. + * Expressions that are unrelated stay unrelated. + */ + ReturnOnlyFiles listOnlyFiles() const override { + auto subExprValue = expr->listOnlyFiles(); + if (subExprValue == ReturnOnlyFiles::Yes) { + return ReturnOnlyFiles::No; + } else if (subExprValue == ReturnOnlyFiles::No) { + return ReturnOnlyFiles::Yes; + } + return ReturnOnlyFiles::Unrelated; + } }; W_TERM_PARSER(not, NotExpr::parse); @@ -69,6 +83,10 @@ class TrueExpr : public QueryExpr { // We will match every path --> unbounded. return std::nullopt; } + + ReturnOnlyFiles listOnlyFiles() const override { + return ReturnOnlyFiles::Unrelated; + } }; W_TERM_PARSER(true, TrueExpr::parse); @@ -88,6 +106,10 @@ class FalseExpr : public QueryExpr { // We will not match any path --> bounded by an empty list of globs. return std::vector{}; } + + ReturnOnlyFiles listOnlyFiles() const override { + return ReturnOnlyFiles::Unrelated; + } }; W_TERM_PARSER(false, FalseExpr::parse); @@ -237,6 +259,59 @@ class ListExpr : public QueryExpr { return std::vector( unionOfUpperBounds.begin(), unionOfUpperBounds.end()); } + /** + * Combines the results of the subexpressions. + * For allof, the result needs to satisfy each subexpression. + * - If any subexpression requests non-file data, the whole expression must + * also contain non-file data. Therefore we return No if any subexpression + * requests non-file data. + * - If any subexpression requests only file data, the whole expression must + * also contain only file data. Therefore we return Yes if one subexpression + * requests only file data. and there is no subexpression requesting + * non-file data. + * - We return No if there are both Yes and No subexpressions because + * we want expressions that exclude groups of types like + * ["not", ["allof", ["type", "l"], ["type", "d"]]] + * to return Yes if we are excluding directories. + * For anyof, the result can satisfy any subexpression. + * - If any subexpression requests non-file data, the whole expression could + * also contain non-file data. Therefore we return No if any subexpression + * requests non-file data. + * - The only way for the whole expression to contain only file data is if + * each subexpression contains only file data. Therefore we return Yes only + * if each subexpression is Yes. + */ + ReturnOnlyFiles listOnlyFiles() const override { + ReturnOnlyFiles result = ReturnOnlyFiles::Unrelated; + if (allof) { + for (auto& subExpr : exprs) { + auto value = subExpr->listOnlyFiles(); + if (value == ReturnOnlyFiles::Yes) { + result = ReturnOnlyFiles::Yes; + } else if (value == ReturnOnlyFiles::No) { + return ReturnOnlyFiles::No; + } + } + } else { + bool allYes = exprs.empty() + ? false + : exprs[0]->listOnlyFiles() == ReturnOnlyFiles::Yes; + for (auto& subExpr : exprs) { + auto value = subExpr->listOnlyFiles(); + if (value == ReturnOnlyFiles::Yes) { + // Keep checking the remaining subexprs + } else if (value == ReturnOnlyFiles::No) { + return ReturnOnlyFiles::No; + } else { + allYes = false; + } + } + if (allYes) { + return ReturnOnlyFiles::Yes; + } + } + return result; + } }; W_TERM_PARSER(anyof, ListExpr::parseAnyOf); diff --git a/watchman/query/dirname.cpp b/watchman/query/dirname.cpp index 4dbcca84cdcc..5c92a1e61830 100644 --- a/watchman/query/dirname.cpp +++ b/watchman/query/dirname.cpp @@ -174,6 +174,10 @@ class DirNameExpr : public QueryExpr { return std::vector{outputPattern.string() + "/**"}; } + + ReturnOnlyFiles listOnlyFiles() const override { + return ReturnOnlyFiles::Unrelated; + } }; W_TERM_PARSER(dirname, DirNameExpr::parseDirName); diff --git a/watchman/query/empty.cpp b/watchman/query/empty.cpp index 2df136483077..cc5c3833b873 100644 --- a/watchman/query/empty.cpp +++ b/watchman/query/empty.cpp @@ -30,6 +30,10 @@ class ExistsExpr : public QueryExpr { // `exists` doesn't constrain the path. return std::nullopt; } + + ReturnOnlyFiles listOnlyFiles() const override { + return ReturnOnlyFiles::Unrelated; + } }; W_TERM_PARSER(exists, ExistsExpr::parse); @@ -71,6 +75,10 @@ class EmptyExpr : public QueryExpr { // `empty` doesn't constrain the path. return std::nullopt; } + + ReturnOnlyFiles listOnlyFiles() const override { + return ReturnOnlyFiles::Unrelated; + } }; W_TERM_PARSER(empty, EmptyExpr::parse); diff --git a/watchman/query/intcompare.cpp b/watchman/query/intcompare.cpp index d4083d9d2694..c89da8f78657 100644 --- a/watchman/query/intcompare.cpp +++ b/watchman/query/intcompare.cpp @@ -128,6 +128,10 @@ class SizeExpr : public QueryExpr { // `size` doesn't constrain the path. return std::nullopt; } + + ReturnOnlyFiles listOnlyFiles() const override { + return ReturnOnlyFiles::Unrelated; + } }; W_TERM_PARSER(size, SizeExpr::parse); diff --git a/watchman/query/match.cpp b/watchman/query/match.cpp index 8c25e092f743..c3c0297cfec9 100644 --- a/watchman/query/match.cpp +++ b/watchman/query/match.cpp @@ -208,6 +208,10 @@ class WildMatchExpr : public QueryExpr { return std::vector{ trimGlobAfterDoubleStar(outputPattern).string()}; } + + ReturnOnlyFiles listOnlyFiles() const override { + return ReturnOnlyFiles::Unrelated; + } }; W_TERM_PARSER(match, WildMatchExpr::parseMatch); W_TERM_PARSER(imatch, WildMatchExpr::parseIMatch); diff --git a/watchman/query/name.cpp b/watchman/query/name.cpp index a6de9c1001d7..d1d3802f0f4f 100644 --- a/watchman/query/name.cpp +++ b/watchman/query/name.cpp @@ -182,6 +182,10 @@ class NameExpr : public QueryExpr { return std::vector( globUpperBound.begin(), globUpperBound.end()); } + + ReturnOnlyFiles listOnlyFiles() const override { + return ReturnOnlyFiles::Unrelated; + } }; W_TERM_PARSER(name, NameExpr::parseName); diff --git a/watchman/query/pcre.cpp b/watchman/query/pcre.cpp index 6138a2c5d079..5f22e7ca0cd7 100644 --- a/watchman/query/pcre.cpp +++ b/watchman/query/pcre.cpp @@ -153,6 +153,10 @@ class PcreExpr : public QueryExpr { return std::nullopt; } + + ReturnOnlyFiles listOnlyFiles() const override { + return ReturnOnlyFiles::Unrelated; + } }; W_TERM_PARSER(pcre, PcreExpr::parsePcre); W_TERM_PARSER(ipcre, PcreExpr::parseIPcre); diff --git a/watchman/query/since.cpp b/watchman/query/since.cpp index b95a2a72a1c1..e290a10ffa7b 100644 --- a/watchman/query/since.cpp +++ b/watchman/query/since.cpp @@ -166,6 +166,10 @@ class SinceExpr : public QueryExpr { // `since` doesn't constrain the path. return std::nullopt; } + + ReturnOnlyFiles listOnlyFiles() const override { + return ReturnOnlyFiles::Unrelated; + } }; W_TERM_PARSER(since, SinceExpr::parse); diff --git a/watchman/query/suffix.cpp b/watchman/query/suffix.cpp index 676e0ebee387..f33230b75fb9 100644 --- a/watchman/query/suffix.cpp +++ b/watchman/query/suffix.cpp @@ -96,6 +96,10 @@ class SuffixExpr : public QueryExpr { // it as unbounded. return std::nullopt; } + + ReturnOnlyFiles listOnlyFiles() const override { + return ReturnOnlyFiles::Unrelated; + } }; W_TERM_PARSER(suffix, SuffixExpr::parse); W_CAP_REG("suffix-set") diff --git a/watchman/query/type.cpp b/watchman/query/type.cpp index 6c6716987414..d1b1b68addd8 100644 --- a/watchman/query/type.cpp +++ b/watchman/query/type.cpp @@ -108,6 +108,17 @@ class TypeExpr : public QueryExpr { // `type` doesn't constrain the path. return std::nullopt; } + + /** + * Determines if this expression will return only files. + * An expression returns files if is not type 'd', for directories. + */ + ReturnOnlyFiles listOnlyFiles() const override { + if (arg == 'd') { + return ReturnOnlyFiles::No; + } + return ReturnOnlyFiles::Yes; + } }; W_TERM_PARSER(type, TypeExpr::parse); diff --git a/watchman/test/ReturnOnlyFilesTest.cpp b/watchman/test/ReturnOnlyFilesTest.cpp new file mode 100644 index 000000000000..1ecaa104d7c6 --- /dev/null +++ b/watchman/test/ReturnOnlyFilesTest.cpp @@ -0,0 +1,194 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include "watchman/query/GlobTree.h" +#include "watchman/query/Query.h" +#include "watchman/query/QueryExpr.h" +#include "watchman/query/TermRegistry.h" +#include "watchman/thirdparty/jansson/jansson.h" + +using namespace watchman; +using namespace testing; + +namespace { +std::optional expr_return_only_files( + std::string expression_json) { + json_error_t err{}; + auto expression = json_loads(expression_json.c_str(), JSON_DECODE_ANY, &err); + if (!expression.has_value()) { + ADD_FAILURE() << "JSON parse error in fixture: " << err.text << " at " + << err.source << ":" << err.line << ":" << err.column; + return std::nullopt; + } + Query query; + // Disable automatic parsing of "match" as "imatch", "name" as "iname", etc. + auto expr = watchman::parseQueryExpr(&query, *expression); + return expr->listOnlyFiles(); +} + +} // namespace + +TEST(ReturnOnlyFilesTest, false) { + EXPECT_THAT( + expr_return_only_files(R"( ["false"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, true) { + EXPECT_THAT( + expr_return_only_files(R"( ["true"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, type_d) { + EXPECT_THAT( + expr_return_only_files(R"( ["type", "d"] )"), + Optional(QueryExpr::ReturnOnlyFiles::No)); +} + +TEST(ReturnOnlyFilesTest, type_f) { + EXPECT_THAT( + expr_return_only_files(R"( ["type", "f"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Yes)); +} + +TEST(ReturnOnlyFilesTest, type_l) { + EXPECT_THAT( + expr_return_only_files(R"( ["type", "l"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Yes)); +} + +TEST(ReturnOnlyFilesTest, dirname) { + EXPECT_THAT( + expr_return_only_files(R"( ["dirname", "l"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, idirname) { + EXPECT_THAT( + expr_return_only_files(R"( ["idirname", "l"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, empty) { + EXPECT_THAT( + expr_return_only_files(R"( ["empty"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, exists) { + EXPECT_THAT( + expr_return_only_files(R"( ["exists"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, match) { + EXPECT_THAT( + expr_return_only_files(R"( ["match", "l"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, imatch) { + EXPECT_THAT( + expr_return_only_files(R"( ["imatch", "l"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, name) { + EXPECT_THAT( + expr_return_only_files(R"( ["name", "l"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, iname) { + EXPECT_THAT( + expr_return_only_files(R"( ["iname", "l"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, pcre) { + EXPECT_THAT( + expr_return_only_files(R"( ["pcre", "l"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, ipcre) { + EXPECT_THAT( + expr_return_only_files(R"( ["ipcre", "l"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, since) { + EXPECT_THAT( + expr_return_only_files(R"( ["since", "c:0:0"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, size) { + EXPECT_THAT( + expr_return_only_files(R"( ["size", "eq", 0] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, suffix) { + EXPECT_THAT( + expr_return_only_files(R"( ["suffix", "txt"] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, allof_yes) { + EXPECT_THAT( + expr_return_only_files(R"( ["allof", ["type", "f"], ["exists"]] )"), + Optional(QueryExpr::ReturnOnlyFiles::Yes)); +} + +TEST(ReturnOnlyFilesTest, allof_no) { + EXPECT_THAT( + expr_return_only_files(R"( ["allof", ["type", "d"], ["exists"]] )"), + Optional(QueryExpr::ReturnOnlyFiles::No)); +} + +TEST(ReturnOnlyFilesTest, allof_unrelated) { + EXPECT_THAT( + expr_return_only_files(R"( ["allof", ["false"], ["exists"]] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} + +TEST(ReturnOnlyFilesTest, allof_yesno) { + EXPECT_THAT( + expr_return_only_files(R"( ["allof", ["type", "d"], ["type", "f"]] )"), + Optional(QueryExpr::ReturnOnlyFiles::No)); +} + +TEST(ReturnOnlyFilesTest, not_allof_yesno) { + EXPECT_THAT( + expr_return_only_files(R"( ["not", + ["allof", ["type", "d"], ["type", "f"]]] )"), + Optional(QueryExpr::ReturnOnlyFiles::Yes)); +} + +TEST(ReturnOnlyFilesTest, anyof_no) { + EXPECT_THAT( + expr_return_only_files(R"( ["allof", ["type", "d"], ["exists"]] )"), + Optional(QueryExpr::ReturnOnlyFiles::No)); +} + +TEST(ReturnOnlyFilesTest, anyof_yes) { + EXPECT_THAT( + expr_return_only_files(R"( ["anyof", ["type", "f"], ["type", "l"]] )"), + Optional(QueryExpr::ReturnOnlyFiles::Yes)); +} + +TEST(ReturnOnlyFilesTest, anyof_unrelated) { + EXPECT_THAT( + expr_return_only_files(R"( ["not", + ["anyof", ["exists"], ["true"]]] )"), + Optional(QueryExpr::ReturnOnlyFiles::Unrelated)); +} diff --git a/watchman/watcher/eden.cpp b/watchman/watcher/eden.cpp index 94ef01a5cd2a..231f0f61d15e 100644 --- a/watchman/watcher/eden.cpp +++ b/watchman/watcher/eden.cpp @@ -652,7 +652,8 @@ std::vector globNameAndDType( const std::string& mountPoint, const std::vector& globPatterns, bool includeDotfiles, - bool splitGlobPattern = false) { + bool splitGlobPattern = false, + bool listOnlyFiles = false) { // TODO(xavierd): Once the config: "eden_split_glob_pattern" is rolled out // everywhere, remove this code. if (splitGlobPattern && globPatterns.size() > 1) { @@ -667,6 +668,7 @@ std::vector globNameAndDType( params.globs() = std::vector{globPattern}; params.includeDotfiles() = includeDotfiles; params.wantDtype() = true; + params.listOnlyFiles() = listOnlyFiles; params.sync() = getSyncBehavior(); globFutures.emplace_back( @@ -685,6 +687,7 @@ std::vector globNameAndDType( params.globs() = globPatterns; params.includeDotfiles() = includeDotfiles; params.wantDtype() = true; + params.listOnlyFiles() = listOnlyFiles; params.sync() = getSyncBehavior(); Glob glob; @@ -865,12 +868,18 @@ class EdenView final : public QueryableView { bool includeDir = true) const { auto client = getEdenClient(thriftChannel_); + bool listOnlyFiles = false; + if (ctx->query->expr) { + listOnlyFiles = + ctx->query->expr->listOnlyFiles() == QueryExpr::ReturnOnlyFiles::Yes; + } auto fileInfo = globNameAndDType( client.get(), mountPoint_, globStrings, includeDotfiles, - splitGlobPattern_); + splitGlobPattern_, + listOnlyFiles); // Filter out any ignored files filterOutPaths(fileInfo, ctx);