diff --git a/CHANGELOG.md b/CHANGELOG.md index fbfeadfa11..cc079a3c83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file based on the ### Backward Compatibility Breaks ### Bugfixes +- Characters "<" and ">" will be removed when a query term is passed to [`Util::escapeTerm`](https://github.com/ruflin/Elastica/pull/1415/files). Since v5.1 the [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/5.1/query-dsl-query-string-query.html#_reserved_characters) states that these symbols cannot be escaped ever. ### Added diff --git a/lib/Elastica/Util.php b/lib/Elastica/Util.php index 518116e691..6e5044634e 100644 --- a/lib/Elastica/Util.php +++ b/lib/Elastica/Util.php @@ -72,12 +72,12 @@ public static function escapeDateMath($requestUri) } /** - * Replace the following reserved words: AND OR NOT + * Replace known reserved words (e.g. AND OR NOT) * and - * escapes the following terms: + - && || ! ( ) { } [ ] ^ " ~ * ? : \. + * escape known special characters (e.g. + - && || ! ( ) { } [ ] ^ " ~ * ? : etc.) * - * @link http://lucene.apache.org/java/2_4_0/queryparsersyntax.html#Boolean%20operators - * @link http://lucene.apache.org/java/2_4_0/queryparsersyntax.html#Escaping%20Special%20Characters + * @link https://www.elastic.co/guide/en/elasticsearch/reference/5.1/query-dsl-query-string-query.html#_boolean_operators + * @link https://www.elastic.co/guide/en/elasticsearch/reference/5.1/query-dsl-query-string-query.html#_reserved_characters * * @param string $term Query term to replace and escape * @@ -106,11 +106,19 @@ public static function escapeTerm($term) { $result = $term; // \ escaping has to be first, otherwise escaped later once again - $chars = ['\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '/', '<', '>']; + $escapableChars = ['\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '/']; - foreach ($chars as $char) { + foreach ($escapableChars as $char) { $result = str_replace($char, '\\'.$char, $result); } + + // < and > cannot be escaped, so they should be removed + // @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters + $nonEscapableChars = ['<', '>']; + + foreach ($nonEscapableChars as $char) { + $result = str_replace($char, '', $result); + } return $result; } diff --git a/test/Elastica/UtilTest.php b/test/Elastica/UtilTest.php index 97996b935d..fc52ad0079 100644 --- a/test/Elastica/UtilTest.php +++ b/test/Elastica/UtilTest.php @@ -109,7 +109,7 @@ public function getReplaceBooleanWordsPairs() public function testEscapeTermSpecialCharacters() { $before = '\\+-&&||!(){}[]^"~*?:/<>'; - $after = '\\\\\\+\\-\\&&\\||\\!\\(\\)\\{\\}\\[\\]\\^\\"\\~\\*\\?\\:\\/\<\>'; + $after = '\\\\\\+\\-\\&&\\||\\!\\(\\)\\{\\}\\[\\]\\^\\"\\~\\*\\?\\:\\/'; $this->assertEquals(Util::escapeTerm($before), $after); }