elastic · martijnvg · Apr 30, 2019 · Apr 25, 2019 · Apr 26, 2019 · Apr 26, 2019
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/enrich/EnrichPolicy.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/enrich/EnrichPolicy.java
@@ -28,7 +28,7 @@
  */
 public final class EnrichPolicy implements Writeable, ToXContentFragment {
 
-    static final String EXACT_MATCH_TYPE = "exact_match";
+    public static final String EXACT_MATCH_TYPE = "exact_match";
     public static final String[] SUPPORTED_POLICY_TYPES = new String[]{EXACT_MATCH_TYPE};
 
     static final ParseField TYPE = new ParseField("type");
@@ -125,6 +125,11 @@ public String getSchedule() {
         return schedule;
     }
 
+    public String getAliasName(String policyName) {
+        // #41553 (list policy api) will add name to policy, so that we don't have to provide the name via a parameter.
+        return ".enrich-" + policyName;
+    }
+
     @Override
     public void writeTo(StreamOutput out) throws IOException {
         out.writeString(type);

diff --git a/x-pack/plugin/enrich/qa/rest/src/test/java/org/elasticsearch/xpack/enrich/EnrichIT.java b/x-pack/plugin/enrich/qa/rest/src/test/java/org/elasticsearch/xpack/enrich/EnrichIT.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.enrich;
+
+import org.apache.http.entity.ByteArrayEntity;
+import org.apache.http.entity.ContentType;
+import org.apache.http.util.EntityUtils;
+import org.elasticsearch.client.Request;
+import org.elasticsearch.client.Response;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentHelper;
+import org.elasticsearch.common.xcontent.XContentType;
+import org.elasticsearch.common.xcontent.json.JsonXContent;
+import org.elasticsearch.test.rest.ESRestTestCase;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.Map;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class EnrichIT extends ESRestTestCase {
+
+    // TODO: update this test when policy runner is ready
+    public void testBasicFlow() throws Exception {
+        // Create the policy:
+        Request putPolicyRequest = new Request("PUT", "/_enrich/policy/my_policy");
+        putPolicyRequest.setJsonEntity("{\"type\": \"exact_match\",\"index_pattern\": \"my-index*\", \"enrich_key\": \"host\", " +
+            "\"enrich_values\": [\"globalRank\", \"tldRank\", \"tld\"], \"schedule\": \"0 5 * * *\"}");
+        assertOK(client().performRequest(putPolicyRequest));
+
+        // Add a single enrich document for now and then refresh:
+        Request indexRequest = new Request("PUT", "/.enrich-my_policy/_doc/elastic.co");
+        XContentBuilder document = XContentBuilder.builder(XContentType.SMILE.xContent());
+        document.startObject();
+        document.field("host", "elastic.co");
+        document.field("globalRank", 25);
+        document.field("tldRank", 7);
+        document.field("tld", "co");
+        document.endObject();
+        document.close();
+        ByteArrayOutputStream out  = (ByteArrayOutputStream) document.getOutputStream();
+        indexRequest.setEntity(new ByteArrayEntity(out.toByteArray(), ContentType.create("application/smile")));
+        assertOK(client().performRequest(indexRequest));
+        Request refreshRequest = new Request("POST", "/.enrich-my_policy/_refresh");
+        assertOK(client().performRequest(refreshRequest));
+
+        // Create pipeline
+        Request putPipelineRequest = new Request("PUT", "/_ingest/pipeline/my_pipeline");
+        putPipelineRequest.setJsonEntity("{\"processors\":[" +
+            "{\"enrich\":{\"policy_name\":\"my_policy\",\"enrich_key\":\"host\",\"enrich_values\":[" +
+                "{\"source\":\"globalRank\",\"target\":\"global_rank\"}," +
+                "{\"source\":\"tldRank\",\"target\":\"tld_rank\"}" +
+                "]}}" +
+            "]}");
+        assertOK(client().performRequest(putPipelineRequest));
+
+        // Index document using pipeline with enrich processor:
+        indexRequest = new Request("PUT", "/my-index/_doc/1");
+        indexRequest.addParameter("pipeline", "my_pipeline");
+        indexRequest.setJsonEntity("{\"host\": \"elastic.co\"}");
+        assertOK(client().performRequest(indexRequest));
+
+        // Check if document has been enriched
+        Request getRequest = new Request("GET", "/my-index/_doc/1");
+        Map<String, Object> response = toMap(client().performRequest(getRequest));
+        Map<?, ?> _source = (Map<?, ?>) response.get("_source");
+        assertThat(_source.size(), equalTo(3));
+        assertThat(_source.get("host"), equalTo("elastic.co"));
+        assertThat(_source.get("global_rank"), equalTo(25));
+        assertThat(_source.get("tld_rank"), equalTo(7));
+    }
+
+    private static Map<String, Object> toMap(Response response) throws IOException {
+        return toMap(EntityUtils.toString(response.getEntity()));
+    }
+
+    private static Map<String, Object> toMap(String response) {
+        return XContentHelper.convertToMap(JsonXContent.jsonXContent, response, false);
+    }
+
+}
diff --git a/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichPlugin.java b/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichPlugin.java
@@ -7,8 +7,10 @@
 
 import org.elasticsearch.action.ActionRequest;
 import org.elasticsearch.action.ActionResponse;
+import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
 import org.elasticsearch.cluster.metadata.MetaData;
+import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.cluster.node.DiscoveryNodes;
 import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
@@ -31,6 +33,7 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.Supplier;
 
 import static java.util.Collections.emptyList;
@@ -48,7 +51,14 @@ public EnrichPlugin(final Settings settings) {
 
     @Override
     public Map<String, Processor.Factory> getProcessors(Processor.Parameters parameters) {
-        return Collections.emptyMap();
+        final ClusterService clusterService = parameters.ingestService.getClusterService();
+        // Pipelines are created from cluster state update thead and calling ClusterService#state() from that thead is illegal
+        // (because the current cluster state update is in progress)
+        // So with the below atomic reference we keep track of the latest updated cluster state:
+        AtomicReference<ClusterState> reference = new AtomicReference<>();
+        clusterService.addStateApplier(event -> reference.set(event.state()));
+
+        return Map.of(EnrichProcessorFactory.TYPE, new EnrichProcessorFactory(reference::get, parameters.localShardSearcher));
     }
 
     public List<ActionHandler<? extends ActionRequest, ? extends ActionResponse>> getActions() {

diff --git a/...ck/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichProcessorFactory.java b/...ck/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichProcessorFactory.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.enrich;
+
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.index.engine.Engine;
+import org.elasticsearch.ingest.ConfigurationUtils;
+import org.elasticsearch.ingest.Processor;
+import org.elasticsearch.xpack.core.enrich.EnrichPolicy;
+
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+
+final class EnrichProcessorFactory implements Processor.Factory {
+
+    static final String TYPE = "enrich";
+
+    private final Function<String, EnrichPolicy> policyLookup;
+    private final Function<String, Engine.Searcher> searchProvider;
+
+    EnrichProcessorFactory(Supplier<ClusterState> clusterStateSupplier,
+                           Function<String, Engine.Searcher> searchProvider) {
+        this.policyLookup = policyName -> EnrichStore.getPolicy(policyName, clusterStateSupplier.get());
+        this.searchProvider = searchProvider;
+    }
+
+    @Override
+    public Processor create(Map<String, Processor.Factory> processorFactories, String tag, Map<String, Object> config) throws Exception {
+        String policyName = ConfigurationUtils.readStringProperty(TYPE, tag, config, "policy_name");
+        EnrichPolicy policy = policyLookup.apply(policyName);
+        if (policy == null) {
+            throw new IllegalArgumentException("policy [" + policyName + "] does not exists");
+        }
+
+        String enrichKey = ConfigurationUtils.readStringProperty(TYPE, tag, config, "enrich_key", policy.getEnrichKey());
+        boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, tag, config, "ignore_missing", false);
+
+        final List<EnrichSpecification> specifications;
+        final List<Map<?, ?>> specificationConfig = ConfigurationUtils.readList(TYPE, tag, config, "enrich_values");
+        specifications = specificationConfig.stream()
+            // TODO: Add templating support in enrich_values source and target options
+            .map(entry -> new EnrichSpecification((String) entry.get("source"), (String) entry.get("target")))
+            .collect(Collectors.toList());
+
+        for (EnrichSpecification specification : specifications) {
+            if (policy.getEnrichValues().contains(specification.sourceField) == false) {
+                throw new IllegalArgumentException("source field [" + specification.sourceField + "] does not exist in policy [" +
+                    policyName + "]");
+            }
+        }
+
+        switch (policy.getType()) {
+            case EnrichPolicy.EXACT_MATCH_TYPE:
+                return new ExactMatchProcessor(tag, policyLookup, searchProvider, policyName, enrichKey, ignoreMissing, specifications);
+            default:
+                throw new IllegalArgumentException("unsupported policy type [" + policy.getType() + "]");
+        }
+    }
+
+    static final class EnrichSpecification {
+
+        final String sourceField;
+        final String targetField;
+
+        EnrichSpecification(String sourceField, String targetField) {
+            this.sourceField = sourceField;
+            this.targetField = targetField;
+        }
+    }
+
+}
diff --git a/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/ExactMatchProcessor.java b/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/ExactMatchProcessor.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.enrich;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.bytes.BytesArray;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.xcontent.XContentHelper;
+import org.elasticsearch.common.xcontent.XContentType;
+import org.elasticsearch.index.engine.Engine;
+import org.elasticsearch.index.mapper.SourceFieldMapper;
+import org.elasticsearch.ingest.AbstractProcessor;
+import org.elasticsearch.ingest.IngestDocument;
+import org.elasticsearch.xpack.core.enrich.EnrichPolicy;
+import org.elasticsearch.xpack.enrich.EnrichProcessorFactory.EnrichSpecification;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+
+final class ExactMatchProcessor extends AbstractProcessor {
+
+    private final Function<String, EnrichPolicy> policyLookup;
+    private final Function<String, Engine.Searcher> searchProvider;
+
+    private final String policyName;
+    private final String enrichKey;
+    private final boolean ignoreMissing;
+    private final List<EnrichSpecification> specifications;
+
+    ExactMatchProcessor(String tag,
+                        Function<String, EnrichPolicy> policyLookup,
+                        Function<String, Engine.Searcher> searchProvider,
+                        String policyName,
+                        String enrichKey,
+                        boolean ignoreMissing,
+                        List<EnrichSpecification> specifications) {
+        super(tag);
+        this.policyLookup = policyLookup;
+        this.searchProvider = searchProvider;
+        this.policyName = policyName;
+        this.enrichKey = enrichKey;
+        this.ignoreMissing = ignoreMissing;
+        this.specifications = specifications;
+    }
+
+    @Override
+    public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
+        final EnrichPolicy policy = policyLookup.apply(policyName);
+        if (policy == null) {
+            throw new IllegalArgumentException("policy [" + policyName + "] does not exists");
+        }
+
+        final String value = ingestDocument.getFieldValue(enrichKey, String.class, ignoreMissing);
+        if (value == null) {
+            return ingestDocument;
+        }
+
+        // TODO: re-use the engine searcher between enriching documents from the same write request
+        try (Engine.Searcher engineSearcher = searchProvider.apply(policy.getAliasName(policyName))) {
+            if (engineSearcher.getDirectoryReader().leaves().size() == 0) {
+                return ingestDocument;
+            } else if (engineSearcher.getDirectoryReader().leaves().size() != 1) {
+                throw new IllegalStateException("enrich index must have exactly a single segment");
+            }
+
+            final LeafReader leafReader = engineSearcher.getDirectoryReader().leaves().get(0).reader();
+            final Terms terms = leafReader.terms(policy.getEnrichKey());
+            if (terms == null) {
+                throw new IllegalStateException("enrich key field [" + policy.getEnrichKey() + "] does not exist");
+            }
+
+            final TermsEnum tenum = terms.iterator();
+            if (tenum.seekExact(new BytesRef(value))) {
+                PostingsEnum penum = tenum.postings(null, PostingsEnum.NONE);
+                final int docId = penum.nextDoc();
+                assert docId != PostingsEnum.NO_MORE_DOCS : "no matching doc id for [" + enrichKey + "]";
+                assert penum.nextDoc() == PostingsEnum.NO_MORE_DOCS : "more than one doc id matching for [" + enrichKey + "]";
+
+                // TODO: The use of _source is temporarily until enrich source field mapper has been added (see PR #41521)
+                Document document = leafReader.document(docId, Set.of(SourceFieldMapper.NAME));
+                BytesRef source = document.getBinaryValue(SourceFieldMapper.NAME);
+                assert source != null;
+
+                final BytesReference encoded = new BytesArray(source);
+                final Map<String, Object> decoded =
+                    XContentHelper.convertToMap(encoded, false, XContentType.SMILE).v2();
+                for (EnrichSpecification specification : specifications) {
+                    Object enrichValue = decoded.get(specification.sourceField);
+                    // TODO: add support over overwrite option (like in SetProcessor)
+                    ingestDocument.setFieldValue(specification.targetField, enrichValue);
+                }
+            }
+        }
+        return ingestDocument;
+    }
+
+    @Override
+    public String getType() {
+        return EnrichProcessorFactory.TYPE;
+    }
+
+    String getPolicyName() {
+        return policyName;
+    }
+
+    String getEnrichKey() {
+        return enrichKey;
+    }
+
+    boolean isIgnoreMissing() {
+        return ignoreMissing;
+    }
+
+    List<EnrichSpecification> getSpecifications() {
+        return specifications;
+    }
+}