From a20084c2442ab4fe18c9ae907b1638d9c585c49d Mon Sep 17 00:00:00 2001 From: jonhehir Date: Tue, 9 Jun 2020 18:57:06 -0400 Subject: [PATCH] fix: Added role_name to default user query in Neo4jSearchDataExtractor (#285) --- .../extractor/neo4j_search_data_extractor.py | 2 +- .../example/scripts/sample_data_loader.py | 34 ++++--------------- 2 files changed, 7 insertions(+), 29 deletions(-) diff --git a/databuilder/databuilder/extractor/neo4j_search_data_extractor.py b/databuilder/databuilder/extractor/neo4j_search_data_extractor.py index d8a323727b..70406f6ade 100644 --- a/databuilder/databuilder/extractor/neo4j_search_data_extractor.py +++ b/databuilder/databuilder/extractor/neo4j_search_data_extractor.py @@ -71,7 +71,7 @@ class Neo4jSearchDataExtractor(Extractor): return user.email as email, user.first_name as first_name, user.last_name as last_name, user.full_name as full_name, user.github_username as github_username, user.team_name as team_name, user.employee_type as employee_type, manager.email as manager_email, - user.slack_id as slack_id, user.is_active as is_active, + user.slack_id as slack_id, user.is_active as is_active, user.role_name as role_name, REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_read, count(distinct b) as total_own, count(distinct c) AS total_follow diff --git a/databuilder/example/scripts/sample_data_loader.py b/databuilder/example/scripts/sample_data_loader.py index a83a13e979..7c17266fc2 100644 --- a/databuilder/example/scripts/sample_data_loader.py +++ b/databuilder/example/scripts/sample_data_loader.py @@ -21,7 +21,6 @@ import os import sqlite3 import sys -import textwrap import uuid from elasticsearch import Elasticsearch from pyhocon import ConfigFactory @@ -166,7 +165,7 @@ def create_last_updated_job(): def create_es_publisher_sample_job(elasticsearch_index_alias='table_search_index', elasticsearch_doc_type_key='table', model_name='databuilder.models.table_elasticsearch_document.TableESDocument', - cypher_query=None, + entity_type='table', elasticsearch_mapping=None): """ :param elasticsearch_index_alias: alias for Elasticsearch used in @@ -174,8 +173,8 @@ def create_es_publisher_sample_job(elasticsearch_index_alias='table_search_index :param elasticsearch_doc_type_key: name the ElasticSearch index is prepended with. Defaults to `table` resulting in `table_search_index` :param model_name: the Databuilder model class used in transporting between Extractor and Loader - :param cypher_query: Query handed to the `Neo4jSearchDataExtractor` class, if None is given (default) - it uses the `Table` query baked into the Extractor + :param entity_type: Entity type handed to the `Neo4jSearchDataExtractor` class, used to determine + Cypher query to extract data from Neo4j. Defaults to `table`. :param elasticsearch_mapping: Elasticsearch field mapping "DDL" handed to the `ElasticsearchPublisher` class, if None is given (default) it uses the `Table` query baked into the Publisher """ @@ -192,6 +191,7 @@ def create_es_publisher_sample_job(elasticsearch_index_alias='table_search_index elasticsearch_new_index_key = 'tables' + str(uuid.uuid4()) job_config = ConfigFactory.from_dict({ + 'extractor.search_data.entity_type': entity_type, 'extractor.search_data.extractor.neo4j.graph_url': neo4j_endpoint, 'extractor.search_data.extractor.neo4j.model_class': model_name, 'extractor.search_data.extractor.neo4j.neo4j_auth_user': neo4j_user, @@ -208,9 +208,6 @@ def create_es_publisher_sample_job(elasticsearch_index_alias='table_search_index }) # only optionally add these keys, so need to dynamically `put` them - if cypher_query: - job_config.put('extractor.search_data.{}'.format(Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY), - cypher_query) if elasticsearch_mapping: job_config.put('publisher.elasticsearch.{}'.format(ElasticsearchPublisher.ELASTICSEARCH_MAPPING_CONFIG_KEY), elasticsearch_mapping) @@ -255,29 +252,10 @@ def create_es_publisher_sample_job(elasticsearch_index_alias='table_search_index job_es_table = create_es_publisher_sample_job( elasticsearch_index_alias='table_search_index', elasticsearch_doc_type_key='table', + entity_type='table', model_name='databuilder.models.table_elasticsearch_document.TableESDocument') job_es_table.launch() - user_cypher_query = textwrap.dedent( - """ - MATCH (user:User) - OPTIONAL MATCH (user)-[read:READ]->(a) - OPTIONAL MATCH (user)-[own:OWNER_OF]->(b) - OPTIONAL MATCH (user)-[follow:FOLLOWED_BY]->(c) - OPTIONAL MATCH (user)-[manage_by:MANAGE_BY]->(manager) - with user, a, b, c, read, own, follow, manager - where user.full_name is not null - return user.email as email, user.first_name as first_name, user.last_name as last_name, - user.full_name as full_name, user.github_username as github_username, user.team_name as team_name, - user.employee_type as employee_type, manager.email as manager_email, user.slack_id as slack_id, - user.role_name as role_name, user.is_active as is_active, - REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_read, - count(distinct b) as total_own, - count(distinct c) AS total_follow - order by user.email - """ - ) - user_elasticsearch_mapping = """ { "mappings":{ @@ -338,6 +316,6 @@ def create_es_publisher_sample_job(elasticsearch_index_alias='table_search_index elasticsearch_index_alias='user_search_index', elasticsearch_doc_type_key='user', model_name='databuilder.models.user_elasticsearch_document.UserESDocument', - cypher_query=user_cypher_query, + entity_type='user', elasticsearch_mapping=user_elasticsearch_mapping) job_es_user.launch()