-
Notifications
You must be signed in to change notification settings - Fork 25k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a new
_ignored
meta field. (#29658)
This adds a new `_ignored` meta field which indexes and stores fields that have been ignored at index time because of the `ignore_malformed` option. It makes malformed documents easier to identify by using `exists` or `term(s)` queries on the `_ignored` field. Closes #29494
- Loading branch information
Showing
21 changed files
with
418 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
[[mapping-ignored-field]] | ||
=== `_ignored` field | ||
|
||
added[6.4.0] | ||
|
||
The `_ignored` field indexes and stores the names of every field in a document | ||
that has been ignored because it was malformed and | ||
<<ignore-malformed,`ignore_malformed`>> was turned on. | ||
|
||
This field is searchable with <<query-dsl-term-query,`term`>>, | ||
<<query-dsl-terms-query,`terms`>> and <<query-dsl-exists-query,`exists`>> | ||
queries, and is returned as part of the search hits. | ||
|
||
For instance the below query matches all documents that have one or more fields | ||
that got ignored: | ||
|
||
[source,js] | ||
-------------------------------------------------- | ||
GET _search | ||
{ | ||
"query": { | ||
"exists": { | ||
"field": "_ignored" | ||
} | ||
} | ||
} | ||
-------------------------------------------------- | ||
// CONSOLE | ||
|
||
Similarly, the below query finds all documents whose `@timestamp` field was | ||
ignored at index time: | ||
|
||
[source,js] | ||
-------------------------------------------------- | ||
GET _search | ||
{ | ||
"query": { | ||
"term": { | ||
"_ignored": "@timestamp" | ||
} | ||
} | ||
} | ||
-------------------------------------------------- | ||
// CONSOLE | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
92 changes: 92 additions & 0 deletions
92
rest-api-spec/src/main/resources/rest-api-spec/test/search/200_ignore_malformed.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
--- | ||
setup: | ||
- skip: | ||
version: " - 6.3.99" | ||
reason: _ignored was added in 6.4.0 | ||
|
||
- do: | ||
indices.create: | ||
index: test | ||
body: | ||
mappings: | ||
_doc: | ||
properties: | ||
my_date: | ||
type: date | ||
ignore_malformed: true | ||
store: true | ||
my_ip: | ||
type: ip | ||
ignore_malformed: true | ||
|
||
- do: | ||
index: | ||
index: test | ||
type: _doc | ||
id: 1 | ||
body: { "my_date": "2018-05-11", "my_ip": ":::1" } | ||
|
||
- do: | ||
index: | ||
index: test | ||
type: _doc | ||
id: 2 | ||
body: { "my_date": "bar", "my_ip": "192.168.1.42" } | ||
|
||
- do: | ||
index: | ||
index: test | ||
type: _doc | ||
id: 3 | ||
body: { "my_date": "bar", "my_ip": "quux" } | ||
|
||
- do: | ||
indices.refresh: {} | ||
|
||
--- | ||
"Exists on _ignored": | ||
|
||
- do: | ||
search: | ||
body: { query: { exists: { "field": "_ignored" } } } | ||
|
||
- length: { hits.hits: 3 } | ||
|
||
--- | ||
"Search on _ignored with term": | ||
|
||
- do: | ||
search: | ||
body: { query: { term: { "_ignored": "my_date" } } } | ||
|
||
- length: { hits.hits: 2 } | ||
|
||
--- | ||
"Search on _ignored with terms": | ||
|
||
- do: | ||
search: | ||
body: { query: { terms: { "_ignored": [ "my_date", "my_ip" ] } } } | ||
|
||
- length: { hits.hits: 3 } | ||
|
||
--- | ||
"_ignored is returned by default": | ||
|
||
- do: | ||
search: | ||
body: { query: { ids: { "values": [ "3" ] } } } | ||
|
||
- length: { hits.hits: 1 } | ||
- length: { hits.hits.0._ignored: 2} | ||
|
||
--- | ||
"_ignored is still returned with explicit list of stored fields": | ||
|
||
- do: | ||
search: | ||
stored_fields: [ "my_date" ] | ||
body: { query: { ids: { "values": [ "3" ] } } } | ||
|
||
- length: { hits.hits: 1 } | ||
- is_true: hits.hits.0._ignored |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
154 changes: 154 additions & 0 deletions
154
server/src/main/java/org/elasticsearch/index/mapper/IgnoredFieldMapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.index.mapper; | ||
|
||
import org.apache.lucene.document.Field; | ||
import org.apache.lucene.index.IndexOptions; | ||
import org.apache.lucene.index.IndexableField; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.search.TermRangeQuery; | ||
import org.elasticsearch.common.lucene.Lucene; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.common.xcontent.XContentBuilder; | ||
import org.elasticsearch.index.query.QueryShardContext; | ||
|
||
import java.io.IOException; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
/** | ||
* A field mapper that records fields that have been ignored because they were malformed. | ||
*/ | ||
public final class IgnoredFieldMapper extends MetadataFieldMapper { | ||
|
||
public static final String NAME = "_ignored"; | ||
|
||
public static final String CONTENT_TYPE = "_ignored"; | ||
|
||
public static class Defaults { | ||
public static final String NAME = IgnoredFieldMapper.NAME; | ||
|
||
public static final MappedFieldType FIELD_TYPE = new IgnoredFieldType(); | ||
|
||
static { | ||
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); | ||
FIELD_TYPE.setTokenized(false); | ||
FIELD_TYPE.setStored(true); | ||
FIELD_TYPE.setOmitNorms(true); | ||
FIELD_TYPE.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); | ||
FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); | ||
FIELD_TYPE.setName(NAME); | ||
FIELD_TYPE.freeze(); | ||
} | ||
} | ||
|
||
public static class Builder extends MetadataFieldMapper.Builder<Builder, IgnoredFieldMapper> { | ||
|
||
public Builder(MappedFieldType existing) { | ||
super(Defaults.NAME, existing == null ? Defaults.FIELD_TYPE : existing, Defaults.FIELD_TYPE); | ||
} | ||
|
||
@Override | ||
public IgnoredFieldMapper build(BuilderContext context) { | ||
return new IgnoredFieldMapper(context.indexSettings()); | ||
} | ||
} | ||
|
||
public static class TypeParser implements MetadataFieldMapper.TypeParser { | ||
@Override | ||
public MetadataFieldMapper.Builder<?,?> parse(String name, Map<String, Object> node, | ||
ParserContext parserContext) throws MapperParsingException { | ||
return new Builder(parserContext.mapperService().fullName(NAME)); | ||
} | ||
|
||
@Override | ||
public MetadataFieldMapper getDefault(MappedFieldType fieldType, ParserContext context) { | ||
final Settings indexSettings = context.mapperService().getIndexSettings().getSettings(); | ||
return new IgnoredFieldMapper(indexSettings); | ||
} | ||
} | ||
|
||
public static final class IgnoredFieldType extends TermBasedFieldType { | ||
|
||
public IgnoredFieldType() { | ||
} | ||
|
||
protected IgnoredFieldType(IgnoredFieldType ref) { | ||
super(ref); | ||
} | ||
|
||
@Override | ||
public IgnoredFieldType clone() { | ||
return new IgnoredFieldType(this); | ||
} | ||
|
||
@Override | ||
public String typeName() { | ||
return CONTENT_TYPE; | ||
} | ||
|
||
@Override | ||
public Query existsQuery(QueryShardContext context) { | ||
// This query is not performance sensitive, it only helps assess | ||
// quality of the data, so we may use a slow query. It shouldn't | ||
// be too slow in practice since the number of unique terms in this | ||
// field is bounded by the number of fields in the mappings. | ||
return new TermRangeQuery(name(), null, null, true, true); | ||
} | ||
|
||
} | ||
|
||
private IgnoredFieldMapper(Settings indexSettings) { | ||
super(NAME, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE, indexSettings); | ||
} | ||
|
||
@Override | ||
public void preParse(ParseContext context) throws IOException { | ||
} | ||
|
||
@Override | ||
public void postParse(ParseContext context) throws IOException { | ||
super.parse(context); | ||
} | ||
|
||
@Override | ||
public Mapper parse(ParseContext context) throws IOException { | ||
// done in post-parse | ||
return null; | ||
} | ||
|
||
@Override | ||
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException { | ||
for (String field : context.getIgnoredFields()) { | ||
context.doc().add(new Field(NAME, field, fieldType())); | ||
} | ||
} | ||
|
||
@Override | ||
protected String contentType() { | ||
return CONTENT_TYPE; | ||
} | ||
|
||
@Override | ||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { | ||
return builder; | ||
} | ||
|
||
} |
Oops, something went wrong.