From 2ed5d6cf1bef84c56ed59f9c280bdd8485582ab2 Mon Sep 17 00:00:00 2001 From: Lizan Zhou Date: Wed, 19 Apr 2017 17:38:03 -0700 Subject: [PATCH] Refactor PathMatcher to an independent BUILD target (#251) * Templatize PathMatcher * make path_matcher.cc ready to move to header * Move path_matcher to independent build target * Remove utils from dep --- contrib/endpoints/src/api_manager/BUILD | 19 +- contrib/endpoints/src/api_manager/config.cc | 8 +- contrib/endpoints/src/api_manager/config.h | 10 +- .../endpoints/src/api_manager/context/BUILD | 1 + .../endpoints/src/api_manager/path_matcher.cc | 384 ---------------- .../endpoints/src/api_manager/path_matcher.h | 409 +++++++++++++++++- .../src/api_manager/path_matcher_test.cc | 4 +- 7 files changed, 416 insertions(+), 419 deletions(-) delete mode 100644 contrib/endpoints/src/api_manager/path_matcher.cc diff --git a/contrib/endpoints/src/api_manager/BUILD b/contrib/endpoints/src/api_manager/BUILD index 48465a163876..088019ead51d 100644 --- a/contrib/endpoints/src/api_manager/BUILD +++ b/contrib/endpoints/src/api_manager/BUILD @@ -52,8 +52,6 @@ cc_library( "config.h", "gce_metadata.h", "method_impl.h", - "path_matcher.h", - "path_matcher_node.h", "request_handler.h", ], deps = [ @@ -77,8 +75,6 @@ cc_library( "gce_metadata.cc", "http_template.h", "method_impl.cc", - "path_matcher.cc", - "path_matcher_node.cc", "quota_control.cc", "quota_control.h", "request_handler.cc", @@ -96,6 +92,7 @@ cc_library( deps = [ ":auth_headers", ":http_template", + ":path_matcher", ":impl_headers", ":server_config_proto", "//contrib/endpoints/src/api_manager/auth", @@ -114,6 +111,20 @@ cc_library( ], ) +cc_library( + name = "path_matcher", + srcs = [ + "path_matcher_node.cc", + "path_matcher_node.h", + ], + hdrs = [ + "path_matcher.h", + ], + deps = [ + ":http_template", + ], +) + cc_library( name = "http_template", srcs = [ diff --git a/contrib/endpoints/src/api_manager/config.cc b/contrib/endpoints/src/api_manager/config.cc index 9a60516dc5ae..52152b901acd 100644 --- a/contrib/endpoints/src/api_manager/config.cc +++ b/contrib/endpoints/src/api_manager/config.cc @@ -131,7 +131,7 @@ bool Config::LoadQuotaRule(ApiManagerEnvInterface *env) { } bool Config::LoadHttpMethods(ApiManagerEnvInterface *env, - PathMatcherBuilder *pmb) { + PathMatcherBuilder *pmb) { std::set all_urls, urls_with_options; // By default, allow_cors is false. This means that the default behavior // of ESP is to reject all "OPTIONS" requests. If customers want to enable @@ -210,7 +210,7 @@ bool Config::LoadHttpMethods(ApiManagerEnvInterface *env, } bool Config::AddOptionsMethodForAllUrls(ApiManagerEnvInterface *env, - PathMatcherBuilder *pmb, + PathMatcherBuilder *pmb, const std::set &all_urls) { // In order to support CORS. Http method OPTIONS needs to be added to // the path_matcher for all urls except the ones already with options. @@ -242,7 +242,7 @@ bool Config::AddOptionsMethodForAllUrls(ApiManagerEnvInterface *env, } bool Config::LoadRpcMethods(ApiManagerEnvInterface *env, - PathMatcherBuilder *pmb) { + PathMatcherBuilder *pmb) { for (const auto &api : service_.apis()) { if (api.name().empty()) { continue; @@ -439,7 +439,7 @@ std::unique_ptr Config::Create(ApiManagerEnvInterface *env, return nullptr; } config->LoadServerConfig(env, server_config); - PathMatcherBuilder pmb; + PathMatcherBuilder pmb; // Load apis before http rules to store API versions if (!config->LoadRpcMethods(env, &pmb)) { return nullptr; diff --git a/contrib/endpoints/src/api_manager/config.h b/contrib/endpoints/src/api_manager/config.h index bdff7e088265..a9dd040f0d6d 100644 --- a/contrib/endpoints/src/api_manager/config.h +++ b/contrib/endpoints/src/api_manager/config.h @@ -93,15 +93,17 @@ class Config { const std::string &server_config); // Create MethodInfo for HTTP methods, register them to PathMatcher. - bool LoadHttpMethods(ApiManagerEnvInterface *env, PathMatcherBuilder *pmb); + bool LoadHttpMethods(ApiManagerEnvInterface *env, + PathMatcherBuilder *pmb); // Add a special option method info for all URLs to support CORS. bool AddOptionsMethodForAllUrls(ApiManagerEnvInterface *env, - PathMatcherBuilder *pmb, + PathMatcherBuilder *pmb, const std::set &all_urls); // Create MethodInfo for RPC methods, register them to PathMatcher. - bool LoadRpcMethods(ApiManagerEnvInterface *env, PathMatcherBuilder *pmb); + bool LoadRpcMethods(ApiManagerEnvInterface *env, + PathMatcherBuilder *pmb); // Load Authentication info to MethodInfo. bool LoadAuthentication(ApiManagerEnvInterface *env); @@ -124,7 +126,7 @@ class Config { ::google::api::Service service_; std::unique_ptr server_config_; - PathMatcherPtr path_matcher_; + PathMatcherPtr path_matcher_; std::map method_map_; // Maps issuer to {jwksUri, openIdValid} pair. // jwksUri is populated either from service config, or by openId discovery. diff --git a/contrib/endpoints/src/api_manager/context/BUILD b/contrib/endpoints/src/api_manager/context/BUILD index c1a5d0113fdd..c51fdae921a8 100644 --- a/contrib/endpoints/src/api_manager/context/BUILD +++ b/contrib/endpoints/src/api_manager/context/BUILD @@ -35,6 +35,7 @@ cc_library( }), deps = [ "//contrib/endpoints/src/api_manager:http_template", + "//contrib/endpoints/src/api_manager:path_matcher", "//contrib/endpoints/src/api_manager:impl_headers", "//contrib/endpoints/src/api_manager:server_config_proto", "//contrib/endpoints/src/api_manager/auth", diff --git a/contrib/endpoints/src/api_manager/path_matcher.cc b/contrib/endpoints/src/api_manager/path_matcher.cc deleted file mode 100644 index 6dd785035050..000000000000 --- a/contrib/endpoints/src/api_manager/path_matcher.cc +++ /dev/null @@ -1,384 +0,0 @@ -// Copyright 2016 Google Inc. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//////////////////////////////////////////////////////////////////////////////// -// -#include "contrib/endpoints/src/api_manager/path_matcher.h" - -#include "contrib/endpoints/include/api_manager/method.h" -#include "contrib/endpoints/include/api_manager/method_call_info.h" -#include "contrib/endpoints/src/api_manager/http_template.h" - -#include -#include -#include -#include - -using std::string; -using std::vector; - -namespace google { -namespace api_manager { - -namespace { - -const char kDefaultServiceName[] = "Default"; - -// Converts a request path into a format that can be used to perform a request -// lookup in the PathMatcher trie. This utility method sanitizes the request -// path and then splits the path into slash separated parts. Returns an empty -// vector if the sanitized path is "/". -// -// custom_verbs is a set of configured custom verbs that are used to match -// against any custom verbs in request path. If the request_path contains a -// custom verb not found in custom_verbs, it is treated as a part of the path. -// -// - Strips off query string: "/a?foo=bar" --> "/a" -// - Collapses extra slashes: "///" --> "/" -vector ExtractRequestParts(string req_path); - -// Looks up on a PathMatcherNode. -PathMatcherLookupResult LookupInPathMatcherNode(const PathMatcherNode& root, - const vector& parts, - const HttpMethod& http_method); - -PathMatcherNode::PathInfo TransformHttpTemplate(const HttpTemplate& ht); - -std::vector& split(const std::string& s, char delim, - std::vector& elems) { - std::stringstream ss(s); - std::string item; - while (std::getline(ss, item, delim)) { - elems.push_back(item); - } - return elems; -} - -inline bool IsReservedChar(char c) { - // Reserved characters according to RFC 6570 - switch (c) { - case '!': - case '#': - case '$': - case '&': - case '\'': - case '(': - case ')': - case '*': - case '+': - case ',': - case '/': - case ':': - case ';': - case '=': - case '?': - case '@': - case '[': - case ']': - return true; - default: - return false; - } -} - -// Check if an ASCII character is a hex digit. We can't use ctype's -// isxdigit() because it is affected by locale. This function is applied -// to the escaped characters in a url, not to natural-language -// strings, so locale should not be taken into account. -inline bool ascii_isxdigit(char c) { - return ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') || - ('0' <= c && c <= '9'); -} - -inline int hex_digit_to_int(char c) { - /* Assume ASCII. */ - int x = static_cast(c); - if (x > '9') { - x += 9; - } - return x & 0xf; -} - -// This is a helper function for UrlUnescapeString. It takes a string and -// the index of where we are within that string. -// -// The function returns true if the next three characters are of the format: -// "%[0-9A-Fa-f]{2}". -// -// If the next three characters are an escaped character then this function will -// also return what character is escaped. -bool GetEscapedChar(const string& src, size_t i, bool unescape_reserved_chars, - char* out) { - if (i + 2 < src.size() && src[i] == '%') { - if (ascii_isxdigit(src[i + 1]) && ascii_isxdigit(src[i + 2])) { - char c = - (hex_digit_to_int(src[i + 1]) << 4) | hex_digit_to_int(src[i + 2]); - if (!unescape_reserved_chars && IsReservedChar(c)) { - return false; - } - *out = c; - return true; - } - } - return false; -} - -// Unescapes string 'part' and returns the unescaped string. Reserved characters -// (as specified in RFC 6570) are not escaped if unescape_reserved_chars is -// false. -std::string UrlUnescapeString(const std::string& part, - bool unescape_reserved_chars) { - std::string unescaped; - // Check whether we need to escape at all. - bool needs_unescaping = false; - char ch = '\0'; - for (size_t i = 0; i < part.size(); ++i) { - if (GetEscapedChar(part, i, unescape_reserved_chars, &ch)) { - needs_unescaping = true; - break; - } - } - if (!needs_unescaping) { - unescaped = part; - return unescaped; - } - - unescaped.resize(part.size()); - - char* begin = &(unescaped)[0]; - char* p = begin; - - for (size_t i = 0; i < part.size();) { - if (GetEscapedChar(part, i, unescape_reserved_chars, &ch)) { - *p++ = ch; - i += 3; - } else { - *p++ = part[i]; - i += 1; - } - } - - unescaped.resize(p - begin); - return unescaped; -} - -void ExtractBindingsFromPath(const std::vector& vars, - const std::vector& parts, - std::vector* bindings) { - for (const auto& var : vars) { - // Determine the subpath bound to the variable based on the - // [start_segment, end_segment) segment range of the variable. - // - // In case of matching "**" - end_segment is negative and is relative to - // the end such that end_segment = -1 will match all subsequent segments. - VariableBinding binding; - binding.field_path = var.field_path; - // Calculate the absolute index of the ending segment in case it's negative. - size_t end_segment = (var.end_segment >= 0) - ? var.end_segment - : parts.size() + var.end_segment + 1; - // It is multi-part match if we have more than one segment. We also make - // sure that a single URL segment match with ** is also considered a - // multi-part match by checking if it->second.end_segment is negative. - bool is_multipart = - (end_segment - var.start_segment) > 1 || var.end_segment < 0; - // Joins parts with "/" to form a path string. - for (size_t i = var.start_segment; i < end_segment; ++i) { - // For multipart matches only unescape non-reserved characters. - binding.value += UrlUnescapeString(parts[i], !is_multipart); - if (i < end_segment - 1) { - binding.value += "/"; - } - } - bindings->emplace_back(binding); - } -} - -void ExtractBindingsFromQueryParameters( - const std::string& query_params, const std::set& system_params, - std::vector* bindings) { - // The bindings in URL the query parameters have the following form: - // =value1&=value2&...&=valueN - // Query parameters may also contain system parameters such as `api_key`. - // We'll need to ignore these. Example: - // book.id=123&book.author=Neal%20Stephenson&api_key=AIzaSyAz7fhBkC35D2M - vector params; - split(query_params, '&', params); - for (const auto& param : params) { - size_t pos = param.find('='); - if (pos != 0 && pos != std::string::npos) { - auto name = param.substr(0, pos); - // Make sure the query parameter is not a system parameter (e.g. - // `api_key`) before adding the binding. - if (system_params.find(name) == std::end(system_params)) { - // The name of the parameter is a field path, which is a dot-delimited - // sequence of field names that identify the (potentially deep) field - // in the request, e.g. `book.author.name`. - VariableBinding binding; - split(name, '.', binding.field_path); - binding.value = UrlUnescapeString(param.substr(pos + 1), true); - bindings->emplace_back(std::move(binding)); - } - } - } -} - -} // namespace - -PathMatcher::PathMatcher(PathMatcherBuilder&& builder) - : root_ptr_(std::move(builder.root_ptr_)), - custom_verbs_(std::move(builder.custom_verbs_)), - methods_(std::move(builder.methods_)) {} - -// Lookup is a wrapper method for the recursive node Lookup. First, the wrapper -// splits the request path into slash-separated path parts. Next, the method -// checks that the |http_method| is supported. If not, then it returns an empty -// WrapperGraph::SharedPtr. Next, this method invokes the node's Lookup on -// the extracted |parts|. Finally, it fills the mapping from variables to their -// values parsed from the path. -// TODO: cache results by adding get/put methods here (if profiling reveals -// benefit) -MethodInfo* PathMatcher::Lookup(const string& http_method, const string& url, - const string& query_params, - std::vector* variable_bindings, - std::string* body_field_path) const { - const vector parts = ExtractRequestParts(url); - - // If service_name has not been registered to ESP and strict_service_matching_ - // is set to false, tries to lookup the method in all registered services. - if (root_ptr_ == nullptr) { - return nullptr; - } - - PathMatcherLookupResult lookup_result = - LookupInPathMatcherNode(*root_ptr_, parts, http_method); - // Return nullptr if nothing is found or the result is marked for duplication. - if (lookup_result.data == nullptr || lookup_result.is_multiple) { - return nullptr; - } - MethodData* method_data = reinterpret_cast(lookup_result.data); - if (variable_bindings != nullptr) { - variable_bindings->clear(); - ExtractBindingsFromPath(method_data->variables, parts, variable_bindings); - ExtractBindingsFromQueryParameters( - query_params, method_data->method->system_query_parameter_names(), - variable_bindings); - } - if (body_field_path != nullptr) { - *body_field_path = method_data->body_field_path; - } - return method_data->method; -} - -MethodInfo* PathMatcher::Lookup(const string& http_method, - const string& path) const { - return Lookup(http_method, path, string(), nullptr, nullptr); -} - -// Initializes the builder with a root Path Segment -PathMatcherBuilder::PathMatcherBuilder() : root_ptr_(new PathMatcherNode()) {} - -PathMatcherPtr PathMatcherBuilder::Build() { - return PathMatcherPtr(new PathMatcher(std::move(*this))); -} - -void PathMatcherBuilder::InsertPathToNode(const PathMatcherNode::PathInfo& path, - void* method_data, - std::string http_method, - bool mark_duplicates, - PathMatcherNode* root_ptr) { - if (root_ptr->InsertPath(path, http_method, method_data, mark_duplicates)) { - // VLOG(3) << "Registered WrapperGraph for " << - // http_template.as_string(); - } else { - // VLOG(3) << "Replaced WrapperGraph for " << http_template.as_string(); - } -} - -// This wrapper converts the |http_rule| into a HttpTemplate. Then, inserts the -// template into the trie. -bool PathMatcherBuilder::Register(string http_method, string http_template, - string body_field_path, MethodInfo* method) { - std::unique_ptr ht(HttpTemplate::Parse(http_template)); - if (nullptr == ht) { - return false; - } - PathMatcherNode::PathInfo path_info = TransformHttpTemplate(*ht); - if (path_info.path_info().size() == 0) { - return false; - } - // Create & initialize a MethodData struct. Then insert its pointer - // into the path matcher trie. - auto method_data = std::unique_ptr(new MethodData()); - method_data->method = method; - method_data->variables = std::move(ht->Variables()); - method_data->body_field_path = std::move(body_field_path); - - InsertPathToNode(path_info, method_data.get(), http_method, true, - root_ptr_.get()); - // Add the method_data to the methods_ vector for cleanup - methods_.emplace_back(std::move(method_data)); - return true; -} - -namespace { - -vector ExtractRequestParts(string path) { - // Remove query parameters. - path = path.substr(0, path.find_first_of('?')); - - // Replace last ':' with '/' to handle custom verb. - // But not for /foo:bar/const. - std::size_t last_colon_pos = path.find_last_of(':'); - std::size_t last_slash_pos = path.find_last_of('/'); - if (last_colon_pos != std::string::npos && last_colon_pos > last_slash_pos) { - path[last_colon_pos] = '/'; - } - - vector result; - if (path.size() > 0) { - split(path.substr(1), '/', result); - } - // Removes all trailing empty parts caused by extra "/". - while (!result.empty() && (*(--result.end())).empty()) { - result.pop_back(); - } - return result; -} - -PathMatcherLookupResult LookupInPathMatcherNode(const PathMatcherNode& root, - const vector& parts, - const HttpMethod& http_method) { - PathMatcherLookupResult result; - root.LookupPath(parts.begin(), parts.end(), http_method, &result); - return result; -} - -PathMatcherNode::PathInfo TransformHttpTemplate(const HttpTemplate& ht) { - PathMatcherNode::PathInfo::Builder builder; - - for (const string& part : ht.segments()) { - builder.AppendLiteralNode(part); - } - if (!ht.verb().empty()) { - builder.AppendLiteralNode(ht.verb()); - } - - return builder.Build(); -} - -} // namespace - -} // namespace api_manager -} // namespace google diff --git a/contrib/endpoints/src/api_manager/path_matcher.h b/contrib/endpoints/src/api_manager/path_matcher.h index 420f8cb0f915..1a9a7c69feb5 100644 --- a/contrib/endpoints/src/api_manager/path_matcher.h +++ b/contrib/endpoints/src/api_manager/path_matcher.h @@ -15,20 +15,20 @@ #ifndef API_MANAGER_PATH_MATCHER_H_ #define API_MANAGER_PATH_MATCHER_H_ -#include +#include #include #include +#include #include #include -#include "contrib/endpoints/include/api_manager/method.h" -#include "contrib/endpoints/include/api_manager/method_call_info.h" #include "contrib/endpoints/src/api_manager/http_template.h" #include "contrib/endpoints/src/api_manager/path_matcher_node.h" namespace google { namespace api_manager { +template class PathMatcherBuilder; // required for PathMatcher constructor // The immutable, thread safe PathMatcher stores a mapping from a combination of @@ -41,28 +41,30 @@ class PathMatcherBuilder; // required for PathMatcher constructor // 1) building the PathMatcher: // PathMatcherBuilder builder(false); // for each (service_name, http_method, url_path, associated method) -// builder.register(service_name, http_method, url_path, datat); +// builder.register(service_name, http_method, url_path, data); // PathMater matcher = builder.Build(); // 2) lookup: // MethodInfo * method = matcher.Lookup(service_name, http_method, // url_path); // if (method == nullptr) failed to find it. // +template class PathMatcher { public: ~PathMatcher(){}; - MethodInfo *Lookup(const std::string &http_method, const std::string &path, - const std::string &query_params, - std::vector *variable_bindings, - std::string *body_field_path) const; + // TODO: Do not template VariableBinding + template + Method Lookup(const std::string& http_method, const std::string& path, + const std::string& query_params, + std::vector* variable_bindings, + std::string* body_field_path) const; - MethodInfo *Lookup(const std::string &http_method, - const std::string &path) const; + Method Lookup(const std::string& http_method, const std::string& path) const; private: // Creates a Path Matcher with a Builder by moving the builder's root node. - explicit PathMatcher(PathMatcherBuilder &&builder); + explicit PathMatcher(PathMatcherBuilder&& builder); // A root node shared by all services, i.e. paths of all services will be // registered to this node. @@ -71,7 +73,7 @@ class PathMatcher { std::set custom_verbs_; // Data we store per each registered method struct MethodData { - MethodInfo *method; + Method method; std::vector variables; std::string body_field_path; }; @@ -80,15 +82,17 @@ class PathMatcher { std::vector> methods_; private: - friend class PathMatcherBuilder; + friend class PathMatcherBuilder; }; -typedef std::unique_ptr PathMatcherPtr; +template +using PathMatcherPtr = std::unique_ptr>; // This PathMatcherBuilder is used to register path-WrapperGraph pairs and // instantiate an immutable, thread safe PathMatcher. // // The PathMatcherBuilder itself is NOT THREAD SAFE. +template class PathMatcherBuilder { public: PathMatcherBuilder(); @@ -100,18 +104,18 @@ class PathMatcherBuilder { // replaces the existing method. Only the last registered method is stored. // Return false if path is an invalid http template. bool Register(std::string http_method, std::string path, - std::string body_field_path, MethodInfo *method); + std::string body_field_path, Method method); // Returns a unique_ptr to a thread safe PathMatcher that contains all // registered path-WrapperGraph pairs. Note the PathMatchBuilder instance // will be moved so cannot use after invoking Build(). - PathMatcherPtr Build(); + PathMatcherPtr Build(); private: // Inserts a path to a PathMatcherNode. - void InsertPathToNode(const PathMatcherNode::PathInfo &path, - void *method_data, std::string http_method, - bool mark_duplicates, PathMatcherNode *root_ptr); + void InsertPathToNode(const PathMatcherNode::PathInfo& path, + void* method_data, std::string http_method, + bool mark_duplicates, PathMatcherNode* root_ptr); // A root node shared by all services, i.e. paths of all services will be // registered to this node. std::unique_ptr root_ptr_; @@ -120,12 +124,375 @@ class PathMatcherBuilder { // be multiple templates in different services on a server. Consider moving // this to PathMatcherNode. std::set custom_verbs_; - typedef PathMatcher::MethodData MethodData; + typedef typename PathMatcher::MethodData MethodData; std::vector> methods_; - friend class PathMatcher; + friend class PathMatcher; }; +namespace { + +std::vector& split(const std::string& s, char delim, + std::vector& elems) { + std::stringstream ss(s); + std::string item; + while (std::getline(ss, item, delim)) { + elems.push_back(item); + } + return elems; +} + +inline bool IsReservedChar(char c) { + // Reserved characters according to RFC 6570 + switch (c) { + case '!': + case '#': + case '$': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case '/': + case ':': + case ';': + case '=': + case '?': + case '@': + case '[': + case ']': + return true; + default: + return false; + } +} + +// Check if an ASCII character is a hex digit. We can't use ctype's +// isxdigit() because it is affected by locale. This function is applied +// to the escaped characters in a url, not to natural-language +// strings, so locale should not be taken into account. +inline bool ascii_isxdigit(char c) { + return ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') || + ('0' <= c && c <= '9'); +} + +inline int hex_digit_to_int(char c) { + /* Assume ASCII. */ + int x = static_cast(c); + if (x > '9') { + x += 9; + } + return x & 0xf; +} + +// This is a helper function for UrlUnescapeString. It takes a string and +// the index of where we are within that string. +// +// The function returns true if the next three characters are of the format: +// "%[0-9A-Fa-f]{2}". +// +// If the next three characters are an escaped character then this function will +// also return what character is escaped. +bool GetEscapedChar(const std::string& src, size_t i, + bool unescape_reserved_chars, char* out) { + if (i + 2 < src.size() && src[i] == '%') { + if (ascii_isxdigit(src[i + 1]) && ascii_isxdigit(src[i + 2])) { + char c = + (hex_digit_to_int(src[i + 1]) << 4) | hex_digit_to_int(src[i + 2]); + if (!unescape_reserved_chars && IsReservedChar(c)) { + return false; + } + *out = c; + return true; + } + } + return false; +} + +// Unescapes string 'part' and returns the unescaped string. Reserved characters +// (as specified in RFC 6570) are not escaped if unescape_reserved_chars is +// false. +std::string UrlUnescapeString(const std::string& part, + bool unescape_reserved_chars) { + std::string unescaped; + // Check whether we need to escape at all. + bool needs_unescaping = false; + char ch = '\0'; + for (size_t i = 0; i < part.size(); ++i) { + if (GetEscapedChar(part, i, unescape_reserved_chars, &ch)) { + needs_unescaping = true; + break; + } + } + if (!needs_unescaping) { + unescaped = part; + return unescaped; + } + + unescaped.resize(part.size()); + + char* begin = &(unescaped)[0]; + char* p = begin; + + for (size_t i = 0; i < part.size();) { + if (GetEscapedChar(part, i, unescape_reserved_chars, &ch)) { + *p++ = ch; + i += 3; + } else { + *p++ = part[i]; + i += 1; + } + } + + unescaped.resize(p - begin); + return unescaped; +} + +template +void ExtractBindingsFromPath(const std::vector& vars, + const std::vector& parts, + std::vector* bindings) { + for (const auto& var : vars) { + // Determine the subpath bound to the variable based on the + // [start_segment, end_segment) segment range of the variable. + // + // In case of matching "**" - end_segment is negative and is relative to + // the end such that end_segment = -1 will match all subsequent segments. + VariableBinding binding; + binding.field_path = var.field_path; + // Calculate the absolute index of the ending segment in case it's negative. + size_t end_segment = (var.end_segment >= 0) + ? var.end_segment + : parts.size() + var.end_segment + 1; + // It is multi-part match if we have more than one segment. We also make + // sure that a single URL segment match with ** is also considered a + // multi-part match by checking if it->second.end_segment is negative. + bool is_multipart = + (end_segment - var.start_segment) > 1 || var.end_segment < 0; + // Joins parts with "/" to form a path string. + for (size_t i = var.start_segment; i < end_segment; ++i) { + // For multipart matches only unescape non-reserved characters. + binding.value += UrlUnescapeString(parts[i], !is_multipart); + if (i < end_segment - 1) { + binding.value += "/"; + } + } + bindings->emplace_back(binding); + } +} + +template +void ExtractBindingsFromQueryParameters( + const std::string& query_params, const std::set& system_params, + std::vector* bindings) { + // The bindings in URL the query parameters have the following form: + // =value1&=value2&...&=valueN + // Query parameters may also contain system parameters such as `api_key`. + // We'll need to ignore these. Example: + // book.id=123&book.author=Neal%20Stephenson&api_key=AIzaSyAz7fhBkC35D2M + std::vector params; + split(query_params, '&', params); + for (const auto& param : params) { + size_t pos = param.find('='); + if (pos != 0 && pos != std::string::npos) { + auto name = param.substr(0, pos); + // Make sure the query parameter is not a system parameter (e.g. + // `api_key`) before adding the binding. + if (system_params.find(name) == std::end(system_params)) { + // The name of the parameter is a field path, which is a dot-delimited + // sequence of field names that identify the (potentially deep) field + // in the request, e.g. `book.author.name`. + VariableBinding binding; + split(name, '.', binding.field_path); + binding.value = UrlUnescapeString(param.substr(pos + 1), true); + bindings->emplace_back(std::move(binding)); + } + } + } +} + +// Converts a request path into a format that can be used to perform a request +// lookup in the PathMatcher trie. This utility method sanitizes the request +// path and then splits the path into slash separated parts. Returns an empty +// vector if the sanitized path is "/". +// +// custom_verbs is a set of configured custom verbs that are used to match +// against any custom verbs in request path. If the request_path contains a +// custom verb not found in custom_verbs, it is treated as a part of the path. +// +// - Strips off query string: "/a?foo=bar" --> "/a" +// - Collapses extra slashes: "///" --> "/" +std::vector ExtractRequestParts(std::string path) { + // Remove query parameters. + path = path.substr(0, path.find_first_of('?')); + + // Replace last ':' with '/' to handle custom verb. + // But not for /foo:bar/const. + std::size_t last_colon_pos = path.find_last_of(':'); + std::size_t last_slash_pos = path.find_last_of('/'); + if (last_colon_pos != std::string::npos && last_colon_pos > last_slash_pos) { + path[last_colon_pos] = '/'; + } + + std::vector result; + if (path.size() > 0) { + split(path.substr(1), '/', result); + } + // Removes all trailing empty parts caused by extra "/". + while (!result.empty() && (*(--result.end())).empty()) { + result.pop_back(); + } + return result; +} + +// Looks up on a PathMatcherNode. +PathMatcherLookupResult LookupInPathMatcherNode( + const PathMatcherNode& root, const std::vector& parts, + const HttpMethod& http_method) { + PathMatcherLookupResult result; + root.LookupPath(parts.begin(), parts.end(), http_method, &result); + return result; +} + +PathMatcherNode::PathInfo TransformHttpTemplate(const HttpTemplate& ht) { + PathMatcherNode::PathInfo::Builder builder; + + for (const std::string& part : ht.segments()) { + builder.AppendLiteralNode(part); + } + if (!ht.verb().empty()) { + builder.AppendLiteralNode(ht.verb()); + } + + return builder.Build(); +} + +} // namespace + +template +PathMatcher::PathMatcher(PathMatcherBuilder&& builder) + : root_ptr_(std::move(builder.root_ptr_)), + custom_verbs_(std::move(builder.custom_verbs_)), + methods_(std::move(builder.methods_)) {} + +// Lookup is a wrapper method for the recursive node Lookup. First, the wrapper +// splits the request path into slash-separated path parts. Next, the method +// checks that the |http_method| is supported. If not, then it returns an empty +// WrapperGraph::SharedPtr. Next, this method invokes the node's Lookup on +// the extracted |parts|. Finally, it fills the mapping from variables to their +// values parsed from the path. +// TODO: cache results by adding get/put methods here (if profiling reveals +// benefit) +template +template +Method PathMatcher::Lookup( + const std::string& http_method, const std::string& path, + const std::string& query_params, + std::vector* variable_bindings, + std::string* body_field_path) const { + const std::vector parts = ExtractRequestParts(path); + + // If service_name has not been registered to ESP and strict_service_matching_ + // is set to false, tries to lookup the method in all registered services. + if (root_ptr_ == nullptr) { + return nullptr; + } + + PathMatcherLookupResult lookup_result = + LookupInPathMatcherNode(*root_ptr_, parts, http_method); + // Return nullptr if nothing is found or the result is marked for duplication. + if (lookup_result.data == nullptr || lookup_result.is_multiple) { + return nullptr; + } + MethodData* method_data = reinterpret_cast(lookup_result.data); + if (variable_bindings != nullptr) { + variable_bindings->clear(); + ExtractBindingsFromPath(method_data->variables, parts, variable_bindings); + ExtractBindingsFromQueryParameters( + query_params, method_data->method->system_query_parameter_names(), + variable_bindings); + } + if (body_field_path != nullptr) { + *body_field_path = method_data->body_field_path; + } + return method_data->method; +} + +// TODO: refactor common code with method above +template +Method PathMatcher::Lookup(const std::string& http_method, + const std::string& path) const { + const std::vector parts = ExtractRequestParts(path); + + // If service_name has not been registered to ESP and strict_service_matching_ + // is set to false, tries to lookup the method in all registered services. + if (root_ptr_ == nullptr) { + return nullptr; + } + + PathMatcherLookupResult lookup_result = + LookupInPathMatcherNode(*root_ptr_, parts, http_method); + // Return nullptr if nothing is found or the result is marked for duplication. + if (lookup_result.data == nullptr || lookup_result.is_multiple) { + return nullptr; + } + MethodData* method_data = reinterpret_cast(lookup_result.data); + return method_data->method; +} + +// Initializes the builder with a root Path Segment +template +PathMatcherBuilder::PathMatcherBuilder() + : root_ptr_(new PathMatcherNode()) {} + +template +PathMatcherPtr PathMatcherBuilder::Build() { + return PathMatcherPtr(new PathMatcher(std::move(*this))); +} + +template +void PathMatcherBuilder::InsertPathToNode( + const PathMatcherNode::PathInfo& path, void* method_data, + std::string http_method, bool mark_duplicates, PathMatcherNode* root_ptr) { + if (root_ptr->InsertPath(path, http_method, method_data, mark_duplicates)) { + // VLOG(3) << "Registered WrapperGraph for " << + // http_template.as_string(); + } else { + // VLOG(3) << "Replaced WrapperGraph for " << http_template.as_string(); + } +} + +// This wrapper converts the |http_rule| into a HttpTemplate. Then, inserts the +// template into the trie. +template +bool PathMatcherBuilder::Register(std::string http_method, + std::string http_template, + std::string body_field_path, + Method method) { + std::unique_ptr ht(HttpTemplate::Parse(http_template)); + if (nullptr == ht) { + return false; + } + PathMatcherNode::PathInfo path_info = TransformHttpTemplate(*ht); + if (path_info.path_info().size() == 0) { + return false; + } + // Create & initialize a MethodData struct. Then insert its pointer + // into the path matcher trie. + auto method_data = std::unique_ptr(new MethodData()); + method_data->method = method; + method_data->variables = std::move(ht->Variables()); + method_data->body_field_path = std::move(body_field_path); + + InsertPathToNode(path_info, method_data.get(), http_method, true, + root_ptr_.get()); + // Add the method_data to the methods_ vector for cleanup + methods_.emplace_back(std::move(method_data)); + return true; +} + } // namespace api_manager } // namespace google diff --git a/contrib/endpoints/src/api_manager/path_matcher_test.cc b/contrib/endpoints/src/api_manager/path_matcher_test.cc index b92fd645f532..cc04d399a652 100644 --- a/contrib/endpoints/src/api_manager/path_matcher_test.cc +++ b/contrib/endpoints/src/api_manager/path_matcher_test.cc @@ -140,8 +140,8 @@ class PathMatcherTest : public ::testing::Test { } private: - PathMatcherBuilder builder_; - PathMatcherPtr matcher_; + PathMatcherBuilder builder_; + PathMatcherPtr matcher_; std::vector stored_methods_; std::set empty_set_; };