From 234d27a642218cfb2fdcb16f3f94d81cf3272bc5 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Mon, 27 Jun 2022 17:31:40 -0400 Subject: [PATCH 01/18] fix(ui): Support deleting references to glossary terms / nodes, users, assertions, and groups (#5248) * Adding referential integrity to deletes API * Updating comments * Fix build * fix checkstyle * Fixing Delete Entity utils Test --- .../assertion/DeleteAssertionResolver.java | 13 +++ .../DeleteGlossaryEntityResolver.java | 13 +++ .../resolvers/group/RemoveGroupResolver.java | 15 +++- .../resolvers/user/RemoveUserResolver.java | 13 +++ .../metadata/entity/DeleteEntityService.java | 80 ++++++++++++++----- .../metadata/entity/DeleteEntityUtils.java | 11 ++- .../entity/DeleteEntityUtilsTest.java | 17 ++-- .../entity/JavaEntityClientFactory.java | 6 ++ .../linkedin/entity/client/EntityClient.java | 6 ++ .../entity/client/JavaEntityClient.java | 8 ++ .../entity/client/RestliEntityClient.java | 11 +++ .../resources/entity/EntityResource.java | 8 +- 12 files changed, 165 insertions(+), 36 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolver.java index c0730485a709e1..95bd9efe71cd8f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolver.java @@ -14,14 +14,17 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.r2.RemoteInvocationException; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletableFuture; +import lombok.extern.slf4j.Slf4j; /** * GraphQL Resolver that deletes an Assertion. */ +@Slf4j public class DeleteAssertionResolver implements DataFetcher> { private final EntityClient _entityClient; @@ -46,6 +49,16 @@ public CompletableFuture get(final DataFetchingEnvironment environment) if (isAuthorizedToDeleteAssertion(context, assertionUrn)) { try { _entityClient.deleteEntity(assertionUrn, context.getAuthentication()); + + // Asynchronously Delete all references to the entity (to return quickly) + CompletableFuture.runAsync(() -> { + try { + _entityClient.deleteEntityReferences(assertionUrn, context.getAuthentication()); + } catch (RemoteInvocationException e) { + log.error(String.format("Caught exception while attempting to clear all entity references for assertion with urn %s", assertionUrn), e); + } + }); + return true; } catch (Exception e) { throw new RuntimeException(String.format("Failed to perform delete against assertion with urn %s", assertionUrn), e); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java index 6bdb9d1ccd1f69..4aa5e6ff9a8d92 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java @@ -6,11 +6,14 @@ import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.r2.RemoteInvocationException; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletableFuture; +import lombok.extern.slf4j.Slf4j; +@Slf4j public class DeleteGlossaryEntityResolver implements DataFetcher> { private final EntityClient _entityClient; @@ -34,6 +37,16 @@ public CompletableFuture get(final DataFetchingEnvironment environment) try { _entityClient.deleteEntity(entityUrn, context.getAuthentication()); + + // Asynchronously Delete all references to the entity (to return quickly) + CompletableFuture.runAsync(() -> { + try { + _entityClient.deleteEntityReferences(entityUrn, context.getAuthentication()); + } catch (RemoteInvocationException e) { + log.error(String.format("Caught exception while attempting to clear all entity references for glossary entity with urn %s", entityUrn), e); + } + }); + return true; } catch (Exception e) { throw new RuntimeException(String.format("Failed to perform delete against glossary entity with urn %s", entityUrn), e); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupResolver.java index 5cbee1defc038e..99b75ea9d90cd4 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupResolver.java @@ -5,13 +5,17 @@ import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.entity.client.EntityClient; +import com.linkedin.r2.RemoteInvocationException; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletableFuture; +import lombok.extern.slf4j.Slf4j; + /** * Resolver responsible for hard deleting a particular DataHub Corp Group */ +@Slf4j public class RemoveGroupResolver implements DataFetcher> { private final EntityClient _entityClient; @@ -28,8 +32,17 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final Urn urn = Urn.createFromString(groupUrn); return CompletableFuture.supplyAsync(() -> { try { - // TODO: Remove all dangling references to this group. _entityClient.deleteEntity(urn, context.getAuthentication()); + + // Asynchronously Delete all references to the entity (to return quickly) + CompletableFuture.runAsync(() -> { + try { + _entityClient.deleteEntityReferences(urn, context.getAuthentication()); + } catch (RemoteInvocationException e) { + log.error(String.format("Caught exception while attempting to clear all entity references for group with urn %s", urn), e); + } + }); + return true; } catch (Exception e) { throw new RuntimeException(String.format("Failed to perform delete against group with urn %s", groupUrn), e); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/RemoveUserResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/RemoveUserResolver.java index ecc183be0252c0..f77823d47ee9ae 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/RemoveUserResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/RemoveUserResolver.java @@ -5,14 +5,17 @@ import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.entity.client.EntityClient; +import com.linkedin.r2.RemoteInvocationException; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletableFuture; +import lombok.extern.slf4j.Slf4j; /** * Resolver responsible for hard deleting a particular DataHub Corp User */ +@Slf4j public class RemoveUserResolver implements DataFetcher> { private final EntityClient _entityClient; @@ -30,6 +33,16 @@ public CompletableFuture get(final DataFetchingEnvironment environment) return CompletableFuture.supplyAsync(() -> { try { _entityClient.deleteEntity(urn, context.getAuthentication()); + + // Asynchronously Delete all references to the entity (to return quickly) + CompletableFuture.runAsync(() -> { + try { + _entityClient.deleteEntityReferences(urn, context.getAuthentication()); + } catch (RemoteInvocationException e) { + log.error(String.format("Caught exception while attempting to clear all entity references for user with urn %s", urn), e); + } + }); + return true; } catch (Exception e) { throw new RuntimeException(String.format("Failed to perform delete against user with urn %s", userUrn), e); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java index ba8898bb65a01b..ceced5dd83ca94 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java @@ -220,29 +220,73 @@ private void deleteReference(final Urn urn, final RelatedEntity relatedEntity) { updatedAspect.get(), aspectSpec.getPegasusSchema(), path)); }); - // If there has been an update - if (!updatedAspect.get().equals(aspect)) { - final MetadataChangeProposal proposal = new MetadataChangeProposal(); - proposal.setEntityUrn(relatedUrn); - proposal.setChangeType(ChangeType.UPSERT); - proposal.setEntityType(relatedUrn.getEntityType()); - proposal.setAspectName(aspectName); - proposal.setAspect(GenericRecordUtils.serializeAspect(updatedAspect.get())); - - final AuditStamp auditStamp = new AuditStamp().setActor(UrnUtils.getUrn(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis()); - final EntityService.IngestProposalResult ingestProposalResult = _entityService.ingestProposal(proposal, auditStamp); - - if (!ingestProposalResult.isDidUpdate()) { - log.error("Failed to ingest aspect with references removed. Before {}, after: {}, please check MCP processor" - + " logs for more information", aspect, updatedAspect); - handleError(new DeleteEntityServiceError("Failed to ingest new aspect", - DeleteEntityServiceErrorReason.MCP_PROCESSOR_FAILED, - ImmutableMap.of("proposal", proposal))); + // If there has been an update, then we produce an MCE. + if (!aspect.equals(updatedAspect.get())) { + if (updatedAspect.get() == null) { + // Then we should remove the aspect. + deleteAspect(relatedUrn, aspectName, aspect); + } else { + // Then we should update the aspect. + updateAspect(relatedUrn, aspectName, aspect, updatedAspect.get()); } } }); } + /** + * Delete an existing aspect for an urn. + * + * @param urn the urn of the entity to remove the aspect for + * @param aspectName the aspect to remove + * @param prevAspect the old value for the aspect + */ + private void deleteAspect(Urn urn, String aspectName, RecordTemplate prevAspect) { + final MetadataChangeProposal proposal = new MetadataChangeProposal(); + proposal.setEntityUrn(urn); + proposal.setChangeType(ChangeType.DELETE); + proposal.setEntityType(urn.getEntityType()); + proposal.setAspectName(aspectName); + + final AuditStamp auditStamp = new AuditStamp().setActor(UrnUtils.getUrn(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis()); + final EntityService.IngestProposalResult ingestProposalResult = _entityService.ingestProposal(proposal, auditStamp); + + if (!ingestProposalResult.isDidUpdate()) { + log.error("Failed to ingest aspect with references removed. Before {}, after: null, please check MCP processor" + + " logs for more information", prevAspect); + handleError(new DeleteEntityServiceError("Failed to ingest new aspect", + DeleteEntityServiceErrorReason.MCP_PROCESSOR_FAILED, + ImmutableMap.of("proposal", proposal))); + } + } + + /** + * Update an aspect for an urn. + * + * @param urn the urn of the entity to remove the aspect for + * @param aspectName the aspect to remove + * @param prevAspect the old value for the aspect + * @param newAspect the new value for the aspect + */ + private void updateAspect(Urn urn, String aspectName, RecordTemplate prevAspect, RecordTemplate newAspect) { + final MetadataChangeProposal proposal = new MetadataChangeProposal(); + proposal.setEntityUrn(urn); + proposal.setChangeType(ChangeType.UPSERT); + proposal.setEntityType(urn.getEntityType()); + proposal.setAspectName(aspectName); + proposal.setAspect(GenericRecordUtils.serializeAspect(newAspect)); + + final AuditStamp auditStamp = new AuditStamp().setActor(UrnUtils.getUrn(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis()); + final EntityService.IngestProposalResult ingestProposalResult = _entityService.ingestProposal(proposal, auditStamp); + + if (!ingestProposalResult.isDidUpdate()) { + log.error("Failed to ingest aspect with references removed. Before {}, after: {}, please check MCP processor" + + " logs for more information", prevAspect, newAspect); + handleError(new DeleteEntityServiceError("Failed to ingest new aspect", + DeleteEntityServiceErrorReason.MCP_PROCESSOR_FAILED, + ImmutableMap.of("proposal", proposal))); + } + } + /** * Utility method that attempts to find Aspect information as well as the associated path spec for a given urn that diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityUtils.java index 1036ddda5e9c86..58b5341c4ae0cd 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityUtils.java @@ -50,6 +50,10 @@ public static Aspect getAspectWithReferenceRemoved(String value, RecordTemplate try { final DataMap copy = aspect.copy().data(); final DataComplex newValue = removeValueBasedOnPath(value, schema, copy, aspectPath.getPathComponents(), 0); + if (newValue == null) { + // If the new value is null, we should remove the aspect. + return null; + } return new Aspect((DataMap) newValue); } catch (CloneNotSupportedException e) { return new Aspect(); @@ -105,7 +109,8 @@ private static DataComplex removeValueFromMap(String value, RecordDataSchema spe if (valueExistsInRecord) { if (canDelete) { record.remove(pathComponents.get(index)); - } else if (record.size() == 1) { + } else { + // If the field is required, then we need to remove the entire record (if possible) return null; } } else { @@ -126,6 +131,10 @@ private static DataComplex removeValueFromMap(String value, RecordDataSchema spe record.remove(key); } else if (record.size() == 1) { return null; + } else { + // Not optional and not the only field, then this is a bad delete. Need to throw. + throw new UnsupportedOperationException( + String.format("Delete failed! Failed to field with name %s from DataMap. The field is required!", key)); } } else { record.put(key, result); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityUtilsTest.java index 13accc02594100..67c9bd0a9e0147 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityUtilsTest.java @@ -73,18 +73,12 @@ public void testNonOptionalFieldRemoval() { + "}"); final DataSchema schema = pdlSchemaParser.lookupName("simple_record"); - final Aspect updatedAspect = DeleteEntityUtils.getAspectWithReferenceRemoved("hello", aspect, schema, - new PathSpec("key_a")); - - assertTrue(updatedAspect.data().containsKey("key_a")); - assertEquals("hello", updatedAspect.data().get("key_a")); - assertTrue(updatedAspect.data().containsKey("key_b")); - assertEquals("world", updatedAspect.data().get("key_b")); - assertEquals(aspect, updatedAspect); + assertNull(DeleteEntityUtils.getAspectWithReferenceRemoved("hello", aspect, schema, + new PathSpec("key_a"))); } /** - * Tests that Aspect Processor does not delete a non-optional value from a record referenced by another record. + * Tests that Aspect Processor deletes a required value from a record referenced by another record. */ @Test public void testNestedFieldRemoval() { @@ -98,15 +92,14 @@ public void testNestedFieldRemoval() { + "}"); pdlSchemaParser.parse("record complex_record {\n" - + "key_c: simple_record\n" + + "key_c: optional simple_record\n" + "}"); final DataSchema schema = pdlSchemaParser.lookupName("complex_record"); final Aspect updatedAspect = DeleteEntityUtils.getAspectWithReferenceRemoved("hello", aspect, schema, new PathSpec("key_c", "key_a")); - assertTrue(updatedAspect.data().containsKey("key_c")); - assertEquals(aspect.data().get("key_c"), updatedAspect.data().get("key_c")); + assertFalse(updatedAspect.data().containsKey("key_c")); } /** diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java index 75e2fa334e563e..7e185dedc94bf7 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java @@ -2,6 +2,7 @@ import com.linkedin.entity.client.JavaEntityClient; import com.linkedin.gms.factory.kafka.DataHubKafkaProducerFactory; +import com.linkedin.metadata.entity.DeleteEntityService; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.search.EntitySearchService; @@ -22,6 +23,10 @@ public class JavaEntityClientFactory { @Qualifier("entityService") private EntityService _entityService; + @Autowired + @Qualifier("deleteEntityService") + private DeleteEntityService _deleteEntityService; + @Autowired @Qualifier("searchService") private SearchService _searchService; @@ -46,6 +51,7 @@ public class JavaEntityClientFactory { public JavaEntityClient getJavaEntityClient() { return new JavaEntityClient( _entityService, + _deleteEntityService, _eventProducer, _entitySearchService, _searchService, diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java index af895a2bcf3e89..734bf2d6aadae7 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java @@ -229,6 +229,12 @@ public ListUrnsResult listUrns(@Nonnull final String entityName, final int start public void deleteEntity(@Nonnull final Urn urn, @Nonnull final Authentication authentication) throws RemoteInvocationException; + /** + * Delete all references to an entity with a particular urn. + */ + public void deleteEntityReferences(@Nonnull final Urn urn, @Nonnull final Authentication authentication) + throws RemoteInvocationException; + /** * Filters entities based on a particular Filter and Sort criterion * diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java index 6bfbd617cb8e22..d3587820a79359 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java @@ -18,6 +18,7 @@ import com.linkedin.metadata.aspect.EnvelopedAspectArray; import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.metadata.browse.BrowseResult; +import com.linkedin.metadata.entity.DeleteEntityService; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.graph.LineageDirection; @@ -63,6 +64,7 @@ public class JavaEntityClient implements EntityClient { private final Clock _clock = Clock.systemUTC(); private final EntityService _entityService; + private final DeleteEntityService _deleteEntityService; private final EventProducer _eventProducer; private final EntitySearchService _entitySearchService; private final SearchService _searchService; @@ -350,6 +352,12 @@ public void deleteEntity(@Nonnull final Urn urn, @Nonnull final Authentication a _entityService.deleteUrn(urn); } + @Override + public void deleteEntityReferences(@Nonnull Urn urn, @Nonnull Authentication authentication) + throws RemoteInvocationException { + _deleteEntityService.deleteReferencesTo(urn, false); + } + @Nonnull @Override public SearchResult filter(@Nonnull String entity, @Nonnull Filter filter, @Nullable SortCriterion sortCriterion, diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java index 695be8593ccaca..3f301a020c274b 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java @@ -18,6 +18,7 @@ import com.linkedin.entity.EntitiesDoBatchGetTotalEntityCountRequestBuilder; import com.linkedin.entity.EntitiesDoBatchIngestRequestBuilder; import com.linkedin.entity.EntitiesDoBrowseRequestBuilder; +import com.linkedin.entity.EntitiesDoDeleteReferencesRequestBuilder; import com.linkedin.entity.EntitiesDoDeleteRequestBuilder; import com.linkedin.entity.EntitiesDoFilterRequestBuilder; import com.linkedin.entity.EntitiesDoGetBrowsePathsRequestBuilder; @@ -487,6 +488,16 @@ public void deleteEntity(@Nonnull final Urn urn, @Nonnull final Authentication a sendClientRequest(requestBuilder, authentication); } + /** + * Delete all references to a particular entity. + */ + @Override + public void deleteEntityReferences(@Nonnull Urn urn, @Nonnull Authentication authentication) + throws RemoteInvocationException { + EntitiesDoDeleteReferencesRequestBuilder requestBuilder = ENTITIES_REQUEST_BUILDERS.actionDeleteReferences().urnParam(urn.toString()); + sendClientRequest(requestBuilder, authentication); + } + @Nonnull @Override public SearchResult filter(@Nonnull String entity, @Nonnull Filter filter, @Nullable SortCriterion sortCriterion, diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index 2c6489e322b3ae..ea6dc4f76551a6 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -96,6 +96,10 @@ public class EntityResource extends CollectionResourceTaskTemplate Date: Mon, 27 Jun 2022 23:56:50 +0100 Subject: [PATCH 02/18] feat(docs) add links in quickstart for adding users (#5267) --- docs/quickstart.md | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/docs/quickstart.md b/docs/quickstart.md index 06de8c96cce457..a7779c2b924532 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -30,16 +30,23 @@ To deploy a new instance of DataHub, perform the following steps. ::: -4. To deploy DataHub, run the following CLI command from your terminal +4. To deploy a DataHub instance locally, run the following CLI command from your terminal ``` datahub docker quickstart ``` + This will deploy a DataHub instance using [docker-compose](https://docs.docker.com/compose/). + Upon completion of this step, you should be able to navigate to the DataHub UI at [http://localhost:9002](http://localhost:9002) in your browser. You can sign in using `datahub` as both the username and password. + If you would like to modify/configure the DataHub installation in some way, please download the [docker-compose.yaml](https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml) used by the cli tool, modify it as necessary and deploy DataHub by passing the downloaded docker-compose file: + ``` + datahub docker quickstart --quickstart-compose-file + ``` + 5. To ingest the sample metadata, run the following CLI command from your terminal ``` @@ -53,8 +60,27 @@ using the `--token ` parameter in the command. ::: -That's it! To start pushing your company's metadata into DataHub, take a look at -the [Metadata Ingestion Framework](../metadata-ingestion/README.md). +That's it! Now feel free to play around with DataHub! + +## Next Steps + +### Ingest Metadata + +To start pushing your company's metadata into DataHub, take a look at the [Metadata Ingestion Framework](../metadata-ingestion/README.md). + +### Invite Users + +To add users to your deployment to share with your team check out our [Adding Users to DataHub](./how/auth/add-users.md) + +### Enable Authentication + +To enable SSO, check out [Configuring OIDC Authentication](./how/auth/sso/configure-oidc-react.md) or [Configuring JaaS Authentication](./how/auth/jaas.md). + +To enable backend Authentication, check out [authentication in DataHub's backend](./introducing-metadata-service-authentication.md#Configuring Metadata Service Authentication). + +### Move to Production + +We recommend deploying DataHub to production using Kubernetes. We provide helpful [Helm Charts](https://artifacthub.io/packages/helm/datahub/datahub) to help you quickly get up and running. Check out [Deploying DataHub to Kubernetes](./deploy/kubernetes.md) for a step-by-step walkthrough. ## Resetting DataHub From 9fd314f92f628cfc9a17b1af8afb237089cd615e Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Mon, 27 Jun 2022 19:04:12 -0400 Subject: [PATCH 03/18] fix(siblings) Display sibling assertions in Validations tab (#5268) * fix(siblings) Display sibling assertions in Validations tab * query changes Co-authored-by: Chris Collins --- .../src/app/entity/shared/siblingUtils.ts | 2 +- .../Dataset/Validations/ValidationsTab.tsx | 21 +++++----- datahub-web-react/src/graphql/dataset.graphql | 40 ++++++++++++------- 3 files changed, 37 insertions(+), 26 deletions(-) diff --git a/datahub-web-react/src/app/entity/shared/siblingUtils.ts b/datahub-web-react/src/app/entity/shared/siblingUtils.ts index 7a0eb268b554cb..93c08022f47a45 100644 --- a/datahub-web-react/src/app/entity/shared/siblingUtils.ts +++ b/datahub-web-react/src/app/entity/shared/siblingUtils.ts @@ -48,7 +48,7 @@ const customMerge = (isPrimary, key) => { if (key === 'platform') { return (a, b) => (isPrimary ? b : a); } - if (key === 'tags' || key === 'terms') { + if (key === 'tags' || key === 'terms' || key === 'assertions') { return (a, b) => { return merge(a, b, { customMerge: customMerge.bind({}, isPrimary), diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/ValidationsTab.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/ValidationsTab.tsx index 34bae76dd8f8f2..8b747c2a776804 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/ValidationsTab.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/ValidationsTab.tsx @@ -9,6 +9,7 @@ import { DatasetAssertionsList } from './DatasetAssertionsList'; import { DatasetAssertionsSummary } from './DatasetAssertionsSummary'; import { sortAssertions } from './assertionUtils'; import { TestResults } from './TestResults'; +import { combineEntityDataWithSiblings } from '../../../siblingUtils'; /** * Returns a status summary for the assertions associated with a Dataset. @@ -47,28 +48,30 @@ enum ViewType { export const ValidationsTab = () => { const { urn, entityData } = useEntityData(); const { data, refetch } = useGetDatasetAssertionsQuery({ variables: { urn } }); + const combinedData = combineEntityDataWithSiblings(data); const [removedUrns, setRemovedUrns] = useState([]); /** * Determines which view should be visible: assertions or tests. */ const [view, setView] = useState(ViewType.ASSERTIONS); - const assertions = (data && data.dataset?.assertions?.assertions?.map((assertion) => assertion as Assertion)) || []; - const maybeTotalAssertions = data?.dataset?.assertions?.total || undefined; - const effectiveTotalAssertions = maybeTotalAssertions || 0; + const assertions = + (combinedData && combinedData.dataset?.assertions?.assertions?.map((assertion) => assertion as Assertion)) || + []; const filteredAssertions = assertions.filter((assertion) => !removedUrns.includes(assertion.urn)); + const numAssertions = filteredAssertions.length; const passingTests = (entityData as any)?.testResults?.passing || []; const maybeFailingTests = (entityData as any)?.testResults?.failing || []; const totalTests = maybeFailingTests.length + passingTests.length; useEffect(() => { - if (totalTests > 0 && maybeTotalAssertions === 0) { + if (totalTests > 0 && numAssertions === 0) { setView(ViewType.TESTS); } else { setView(ViewType.ASSERTIONS); } - }, [totalTests, maybeTotalAssertions]); + }, [totalTests, numAssertions]); // Pre-sort the list of assertions based on which has been most recently executed. assertions.sort(sortAssertions); @@ -77,13 +80,9 @@ export const ValidationsTab = () => { <>
-
); }; diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx index 997dccec6c8c16..aa1128646801ac 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx @@ -1,6 +1,6 @@ import React from 'react'; import { Link } from 'react-router-dom'; -import styled from 'styled-components'; +import styled from 'styled-components/macro'; import { message, Button, List, Typography } from 'antd'; import { LinkOutlined, DeleteOutlined } from '@ant-design/icons'; import { EntityType } from '../../../../../../types.generated'; diff --git a/datahub-web-react/src/app/entity/shared/types.ts b/datahub-web-react/src/app/entity/shared/types.ts index c03efc5dfb1a78..cf94fee7dfdd26 100644 --- a/datahub-web-react/src/app/entity/shared/types.ts +++ b/datahub-web-react/src/app/entity/shared/types.ts @@ -57,6 +57,8 @@ export type GenericEntityProperties = { properties?: Maybe<{ description?: Maybe; qualifiedName?: Maybe; + sourceUrl?: Maybe; + sourceRef?: Maybe; }>; globalTags?: Maybe; glossaryTerms?: Maybe; From d62c77f70abc284e9256500b034bd32a02945c9c Mon Sep 17 00:00:00 2001 From: Mugdha Hardikar Date: Tue, 28 Jun 2022 15:48:09 +0530 Subject: [PATCH 09/18] fix(ingest): delta-lake - fix dependency issue for snowflake due to s3_util (#5274) --- .../ingestion/source/aws/s3_boto_utils.py | 101 ++++++++++++++++++ .../ingestion/source/delta_lake/source.py | 3 +- .../src/datahub/ingestion/source/s3/source.py | 3 +- 3 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 metadata-ingestion/src/datahub/ingestion/source/aws/s3_boto_utils.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/s3_boto_utils.py b/metadata-ingestion/src/datahub/ingestion/source/aws/s3_boto_utils.py new file mode 100644 index 00000000000000..caec19d0fb2492 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/s3_boto_utils.py @@ -0,0 +1,101 @@ +import logging +from typing import Iterable, Optional + +from datahub.emitter.mce_builder import make_tag_urn +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.source.aws.aws_common import AwsSourceConfig +from datahub.ingestion.source.aws.s3_util import ( + get_bucket_name, + get_bucket_relative_path, + is_s3_uri, +) +from datahub.metadata.schema_classes import GlobalTagsClass, TagAssociationClass + +logging.getLogger("py4j").setLevel(logging.ERROR) +logger: logging.Logger = logging.getLogger(__name__) + + +def get_s3_tags( + bucket_name: str, + key_name: Optional[str], + dataset_urn: str, + aws_config: Optional[AwsSourceConfig], + ctx: PipelineContext, + use_s3_bucket_tags: Optional[bool] = False, + use_s3_object_tags: Optional[bool] = False, +) -> Optional[GlobalTagsClass]: + if aws_config is None: + raise ValueError("aws_config not set. Cannot browse s3") + new_tags = GlobalTagsClass(tags=[]) + tags_to_add = [] + if use_s3_bucket_tags: + s3 = aws_config.get_s3_resource() + bucket = s3.Bucket(bucket_name) + try: + tags_to_add.extend( + [ + make_tag_urn(f"""{tag["Key"]}:{tag["Value"]}""") + for tag in bucket.Tagging().tag_set + ] + ) + except s3.meta.client.exceptions.ClientError: + logger.warn(f"No tags found for bucket={bucket_name}") + + if use_s3_object_tags and key_name is not None: + s3_client = aws_config.get_s3_client() + object_tagging = s3_client.get_object_tagging(Bucket=bucket_name, Key=key_name) + tag_set = object_tagging["TagSet"] + if tag_set: + tags_to_add.extend( + [make_tag_urn(f"""{tag["Key"]}:{tag["Value"]}""") for tag in tag_set] + ) + else: + # Unlike bucket tags, if an object does not have tags, it will just return an empty array + # as opposed to an exception. + logger.warn(f"No tags found for bucket={bucket_name} key={key_name}") + if len(tags_to_add) == 0: + return None + if ctx.graph is not None: + logger.debug("Connected to DatahubApi, grabbing current tags to maintain.") + current_tags: Optional[GlobalTagsClass] = ctx.graph.get_aspect_v2( + entity_urn=dataset_urn, + aspect="globalTags", + aspect_type=GlobalTagsClass, + ) + if current_tags: + tags_to_add.extend([current_tag.tag for current_tag in current_tags.tags]) + else: + logger.warn("Could not connect to DatahubApi. No current tags to maintain") + # Remove duplicate tags + tags_to_add = list(set(tags_to_add)) + new_tags = GlobalTagsClass( + tags=[TagAssociationClass(tag_to_add) for tag_to_add in tags_to_add] + ) + return new_tags + + +def list_folders_path( + s3_uri: str, aws_config: Optional[AwsSourceConfig] +) -> Iterable[str]: + if not is_s3_uri(s3_uri): + raise ValueError("Not a s3 URI: " + s3_uri) + if aws_config is None: + raise ValueError("aws_config not set. Cannot browse s3") + bucket_name = get_bucket_name(s3_uri) + prefix = get_bucket_relative_path(s3_uri) + yield from list_folders(bucket_name, prefix, aws_config) + + +def list_folders( + bucket_name: str, prefix: str, aws_config: Optional[AwsSourceConfig] +) -> Iterable[str]: + if aws_config is None: + raise ValueError("aws_config not set. Cannot browse s3") + s3_client = aws_config.get_s3_client() + paginator = s3_client.get_paginator("list_objects_v2") + for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix, Delimiter="/"): + for o in page.get("CommonPrefixes", []): + folder: str = str(o.get("Prefix")) + if folder.endswith("/"): + folder = folder[:-1] + yield f"{folder}" diff --git a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py index c292c16764ec3a..42837ac833b09b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py @@ -19,11 +19,10 @@ ) from datahub.ingestion.api.source import Source, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.aws.s3_boto_utils import get_s3_tags, list_folders_path from datahub.ingestion.source.aws.s3_util import ( get_bucket_name, get_key_prefix, - get_s3_tags, - list_folders_path, strip_s3_prefix, ) from datahub.ingestion.source.data_lake.data_lake_utils import ContainerWUCreator diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index 4e1dda628f1dad..6e66dcc3d84167 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -51,12 +51,11 @@ ) from datahub.ingestion.api.source import Source, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.aws.s3_boto_utils import get_s3_tags, list_folders from datahub.ingestion.source.aws.s3_util import ( get_bucket_name, get_bucket_relative_path, get_key_prefix, - get_s3_tags, - list_folders, strip_s3_prefix, ) from datahub.ingestion.source.data_lake.data_lake_utils import ContainerWUCreator From b76005d6407be1ece43948d429c3e04ad067a63b Mon Sep 17 00:00:00 2001 From: Mugdha Hardikar Date: Tue, 28 Jun 2022 20:20:41 +0530 Subject: [PATCH 10/18] fix(ingest): s3 - Remove unneeded methods from s3_util (#5276) --- .../datahub/ingestion/source/aws/s3_util.py | 92 ------------------- 1 file changed, 92 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py b/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py index 0acde18d86e0a8..a09b21a4c8da7e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py @@ -1,11 +1,5 @@ import logging import os -from typing import Iterable, Optional - -from datahub.emitter.mce_builder import make_tag_urn -from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.source.aws.aws_common import AwsSourceConfig -from datahub.metadata.schema_classes import GlobalTagsClass, TagAssociationClass S3_PREFIXES = ["s3://", "s3n://", "s3a://"] @@ -13,92 +7,6 @@ logger: logging.Logger = logging.getLogger(__name__) -def get_s3_tags( - bucket_name: str, - key_name: Optional[str], - dataset_urn: str, - aws_config: Optional[AwsSourceConfig], - ctx: PipelineContext, - use_s3_bucket_tags: Optional[bool] = False, - use_s3_object_tags: Optional[bool] = False, -) -> Optional[GlobalTagsClass]: - if aws_config is None: - raise ValueError("aws_config not set. Cannot browse s3") - new_tags = GlobalTagsClass(tags=[]) - tags_to_add = [] - if use_s3_bucket_tags: - s3 = aws_config.get_s3_resource() - bucket = s3.Bucket(bucket_name) - try: - tags_to_add.extend( - [ - make_tag_urn(f"""{tag["Key"]}:{tag["Value"]}""") - for tag in bucket.Tagging().tag_set - ] - ) - except s3.meta.client.exceptions.ClientError: - logger.warn(f"No tags found for bucket={bucket_name}") - - if use_s3_object_tags and key_name is not None: - s3_client = aws_config.get_s3_client() - object_tagging = s3_client.get_object_tagging(Bucket=bucket_name, Key=key_name) - tag_set = object_tagging["TagSet"] - if tag_set: - tags_to_add.extend( - [make_tag_urn(f"""{tag["Key"]}:{tag["Value"]}""") for tag in tag_set] - ) - else: - # Unlike bucket tags, if an object does not have tags, it will just return an empty array - # as opposed to an exception. - logger.warn(f"No tags found for bucket={bucket_name} key={key_name}") - if len(tags_to_add) == 0: - return None - if ctx.graph is not None: - logger.debug("Connected to DatahubApi, grabbing current tags to maintain.") - current_tags: Optional[GlobalTagsClass] = ctx.graph.get_aspect_v2( - entity_urn=dataset_urn, - aspect="globalTags", - aspect_type=GlobalTagsClass, - ) - if current_tags: - tags_to_add.extend([current_tag.tag for current_tag in current_tags.tags]) - else: - logger.warn("Could not connect to DatahubApi. No current tags to maintain") - # Remove duplicate tags - tags_to_add = list(set(tags_to_add)) - new_tags = GlobalTagsClass( - tags=[TagAssociationClass(tag_to_add) for tag_to_add in tags_to_add] - ) - return new_tags - - -def list_folders_path( - s3_uri: str, aws_config: Optional[AwsSourceConfig] -) -> Iterable[str]: - if not is_s3_uri(s3_uri): - raise ValueError("Not a s3 URI: " + s3_uri) - if aws_config is None: - raise ValueError("aws_config not set. Cannot browse s3") - bucket_name = get_bucket_name(s3_uri) - prefix = get_bucket_relative_path(s3_uri) - yield from list_folders(bucket_name, prefix, aws_config) - - -def list_folders( - bucket_name: str, prefix: str, aws_config: Optional[AwsSourceConfig] -) -> Iterable[str]: - if aws_config is None: - raise ValueError("aws_config not set. Cannot browse s3") - s3_client = aws_config.get_s3_client() - paginator = s3_client.get_paginator("list_objects_v2") - for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix, Delimiter="/"): - for o in page.get("CommonPrefixes", []): - folder: str = str(o.get("Prefix")) - if folder.endswith("/"): - folder = folder[:-1] - yield f"{folder}" - - def is_s3_uri(uri: str) -> bool: return any(uri.startswith(prefix) for prefix in S3_PREFIXES) From bf7da0a853e8459e71181883cc7cd57e69db1dc8 Mon Sep 17 00:00:00 2001 From: Ankit keshari <86347578+Ankit-Keshari-Vituity@users.noreply.github.com> Date: Tue, 28 Jun 2022 22:15:25 +0530 Subject: [PATCH 11/18] feat(ui): Selector recommendations in Owner, Tag and Domain Modal (#5197) --- .../entity/group/GroupOwnerSideBarSection.tsx | 21 +- .../profile/sidebar/Domain/SetDomainModal.tsx | 191 ++++++++------- .../sidebar/Domain/SidebarDomainSection.tsx | 15 +- .../sidebar/Ownership/AddOwnersModal.tsx | 226 ++++++++---------- .../sidebar/Ownership/SidebarOwnerSection.tsx | 23 +- .../src/app/shared/DomainLabel.tsx | 29 +++ .../src/app/shared/OwnerLabel.tsx | 34 +++ .../src/app/shared/TagStyleEntity.tsx | 21 +- .../src/app/shared/recommendation.tsx | 18 ++ .../src/app/shared/tags/AddTagsTermsModal.tsx | 96 +++++--- 10 files changed, 394 insertions(+), 280 deletions(-) create mode 100644 datahub-web-react/src/app/shared/DomainLabel.tsx create mode 100644 datahub-web-react/src/app/shared/OwnerLabel.tsx create mode 100644 datahub-web-react/src/app/shared/recommendation.tsx diff --git a/datahub-web-react/src/app/entity/group/GroupOwnerSideBarSection.tsx b/datahub-web-react/src/app/entity/group/GroupOwnerSideBarSection.tsx index 1708d6f5aa62b5..2cf84bc375c0cb 100644 --- a/datahub-web-react/src/app/entity/group/GroupOwnerSideBarSection.tsx +++ b/datahub-web-react/src/app/entity/group/GroupOwnerSideBarSection.tsx @@ -50,16 +50,17 @@ export default function GroupOwnerSideBarSection({ urn, ownership, refetch }: Pr )} - { - setShowAddModal(false); - }} - /> + {showAddModal && ( + { + setShowAddModal(false); + }} + /> + )} ); } diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SetDomainModal.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SetDomainModal.tsx index d0db0fe5be4406..afd634326cd8ba 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SetDomainModal.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SetDomainModal.tsx @@ -1,82 +1,89 @@ -import { Button, Form, message, Modal, Select, Tag } from 'antd'; import React, { useRef, useState } from 'react'; +import { Button, Form, message, Modal, Select, Tag } from 'antd'; import styled from 'styled-components'; -import { Link } from 'react-router-dom'; + import { useGetSearchResultsLazyQuery } from '../../../../../../../graphql/search.generated'; -import { EntityType, SearchResult } from '../../../../../../../types.generated'; +import { Entity, EntityType } from '../../../../../../../types.generated'; import { useSetDomainMutation } from '../../../../../../../graphql/mutations.generated'; import { useEntityRegistry } from '../../../../../../useEntityRegistry'; import { useEntityData } from '../../../../EntityContext'; import { useEnterKeyListener } from '../../../../../../shared/useEnterKeyListener'; +import { useGetRecommendations } from '../../../../../../shared/recommendation'; +import { DomainLabel } from '../../../../../../shared/DomainLabel'; type Props = { - visible: boolean; - onClose: () => void; + onCloseModal: () => void; refetch?: () => Promise; }; -const SearchResultContainer = styled.div` - display: flex; - justify-content: space-between; - align-items: center; - padding: 12px; -`; - -const SearchResultContent = styled.div` - display: flex; - justify-content: start; - align-items: center; -`; - -const SearchResultDisplayName = styled.div` - margin-left: 12px; -`; - type SelectedDomain = { displayName: string; type: EntityType; urn: string; }; -export const SetDomainModal = ({ visible, onClose, refetch }: Props) => { +const StyleTag = styled(Tag)` + padding: 0px 7px; + margin-right: 3px; + display: flex; + justify-content: start; + align-items: center; +`; + +export const SetDomainModal = ({ onCloseModal, refetch }: Props) => { const entityRegistry = useEntityRegistry(); const { urn } = useEntityData(); + const [inputValue, setInputValue] = useState(''); const [selectedDomain, setSelectedDomain] = useState(undefined); const [domainSearch, { data: domainSearchData }] = useGetSearchResultsLazyQuery(); - const domainSearchResults = domainSearchData?.search?.searchResults || []; + const domainSearchResults = + domainSearchData?.search?.searchResults?.map((searchResult) => searchResult.entity) || []; const [setDomainMutation] = useSetDomainMutation(); - + const [recommendedData] = useGetRecommendations([EntityType.Domain]); const inputEl = useRef(null); - const onOk = async () => { - if (!selectedDomain) { - return; - } - try { - await setDomainMutation({ + const onModalClose = () => { + setInputValue(''); + setSelectedDomain(undefined); + onCloseModal(); + }; + + const handleSearch = (text: string) => { + if (text.length > 2) { + domainSearch({ variables: { - entityUrn: urn, - domainUrn: selectedDomain.urn, + input: { + type: EntityType.Domain, + query: text, + start: 0, + count: 5, + }, }, }); - message.success({ content: 'Updated Domain!', duration: 2 }); - } catch (e: unknown) { - message.destroy(); - if (e instanceof Error) { - message.error({ content: `Failed to set Domain: \n ${e.message || ''}`, duration: 3 }); - } } - setSelectedDomain(undefined); - refetch?.(); - onClose(); }; + // Renders a search result in the select dropdown. + const renderSearchResult = (entity: Entity) => { + const displayName = entityRegistry.getDisplayName(entity.type, entity); + return ( + + + + ); + }; + + const domainResult = !inputValue || inputValue.length === 0 ? recommendedData : domainSearchResults; + + const domainSearchOptions = domainResult?.map((result) => { + return renderSearchResult(result); + }); + const onSelectDomain = (newUrn: string) => { if (inputEl && inputEl.current) { (inputEl.current as any).blur(); } - const filteredDomains = - domainSearchResults?.filter((result) => result.entity.urn === newUrn).map((result) => result.entity) || []; + const filteredDomains = domainResult?.filter((entity) => entity.urn === newUrn).map((entity) => entity) || []; if (filteredDomains.length) { const domain = filteredDomains[0]; setSelectedDomain({ @@ -87,56 +94,67 @@ export const SetDomainModal = ({ visible, onClose, refetch }: Props) => { } }; - const handleSearch = (text: string) => { - if (text.length > 2) { - domainSearch({ + const onDeselectDomain = () => { + setInputValue(''); + setSelectedDomain(undefined); + }; + + const onOk = async () => { + if (!selectedDomain) { + return; + } + try { + await setDomainMutation({ variables: { - input: { - type: EntityType.Domain, - query: text, - start: 0, - count: 5, - }, + entityUrn: urn, + domainUrn: selectedDomain.urn, }, }); + message.success({ content: 'Updated Domain!', duration: 2 }); + } catch (e: unknown) { + message.destroy(); + if (e instanceof Error) { + message.error({ content: `Failed to set Domain: \n ${e.message || ''}`, duration: 3 }); + } } + setSelectedDomain(undefined); + refetch?.(); + onModalClose(); }; + const selectValue = (selectedDomain && [selectedDomain?.displayName]) || undefined; + // Handle the Enter press useEnterKeyListener({ querySelectorToExecuteClick: '#setDomainButton', }); - const renderSearchResult = (result: SearchResult) => { - const displayName = entityRegistry.getDisplayName(result.entity.type, result.entity); + const tagRender = (props) => { + // eslint-disable-next-line react/prop-types + const { label, closable, onClose } = props; + const onPreventMouseDown = (event) => { + event.preventDefault(); + event.stopPropagation(); + }; return ( - - - -
{displayName}
-
-
- `/${entityRegistry.getPathName(result.entity.type)}/${result.entity.urn}`} - > - View - {' '} -
+ + {label} + ); }; - const selectValue = (selectedDomain && [selectedDomain?.displayName]) || []; + function handleBlur() { + setInputValue(''); + } return ( - - { - setShowAddModal(false); - }} - /> + {showAddModal && ( + { + setShowAddModal(false); + }} + /> + )} ); }; diff --git a/datahub-web-react/src/app/shared/DomainLabel.tsx b/datahub-web-react/src/app/shared/DomainLabel.tsx new file mode 100644 index 00000000000000..40208026d4369f --- /dev/null +++ b/datahub-web-react/src/app/shared/DomainLabel.tsx @@ -0,0 +1,29 @@ +import React from 'react'; +import styled from 'styled-components'; + +const DomainContainerWrapper = styled.div` + display: flex; + justify-content: space-between; + align-items: center; + padding: 12px; +`; + +const DomainContentWrapper = styled.div` + display: flex; + justify-content: center; + align-items: center; +`; + +type Props = { + name: string; +}; + +export const DomainLabel = ({ name }: Props) => { + return ( + + +
{name}
+
+
+ ); +}; diff --git a/datahub-web-react/src/app/shared/OwnerLabel.tsx b/datahub-web-react/src/app/shared/OwnerLabel.tsx new file mode 100644 index 00000000000000..de3c03dea2ba4a --- /dev/null +++ b/datahub-web-react/src/app/shared/OwnerLabel.tsx @@ -0,0 +1,34 @@ +import React from 'react'; +import styled from 'styled-components'; +import { EntityType } from '../../types.generated'; +import { CustomAvatar } from './avatar'; + +const OwnerContainerWrapper = styled.div` + display: flex; + justify-content: space-between; + align-items: center; + padding: 2px; +`; + +const OwnerContentWrapper = styled.div` + display: flex; + justify-content: center; + align-items: center; +`; + +type Props = { + name: string; + avatarUrl: string | undefined; + type: EntityType; +}; + +export const OwnerLabel = ({ name, avatarUrl, type }: Props) => { + return ( + + + +
{name}
+
+
+ ); +}; diff --git a/datahub-web-react/src/app/shared/TagStyleEntity.tsx b/datahub-web-react/src/app/shared/TagStyleEntity.tsx index 23460ccef8dc9a..7a246d9ed839e6 100644 --- a/datahub-web-react/src/app/shared/TagStyleEntity.tsx +++ b/datahub-web-react/src/app/shared/TagStyleEntity.tsx @@ -414,16 +414,17 @@ export default function TagStyleEntity({ urn, useGetSearchResults = useWrappedSe
- { - setShowAddModal(false); - }} - urn={urn} - type={EntityType.Tag} - /> + {showAddModal && ( + { + setShowAddModal(false); + }} + urn={urn} + type={EntityType.Tag} + /> + )}
diff --git a/datahub-web-react/src/app/shared/recommendation.tsx b/datahub-web-react/src/app/shared/recommendation.tsx new file mode 100644 index 00000000000000..19a30877961526 --- /dev/null +++ b/datahub-web-react/src/app/shared/recommendation.tsx @@ -0,0 +1,18 @@ +import { useGetSearchResultsForMultipleQuery } from '../../graphql/search.generated'; +import { EntityType } from '../../types.generated'; + +export const useGetRecommendations = (types: Array) => { + const { data } = useGetSearchResultsForMultipleQuery({ + variables: { + input: { + types, + query: '*', + start: 0, + count: 5, + }, + }, + }); + + const recommendedData = data?.searchAcrossEntities?.searchResults?.map((searchResult) => searchResult.entity) || []; + return [recommendedData]; +}; diff --git a/datahub-web-react/src/app/shared/tags/AddTagsTermsModal.tsx b/datahub-web-react/src/app/shared/tags/AddTagsTermsModal.tsx index 00479f3a0514e9..9101ad34029d0f 100644 --- a/datahub-web-react/src/app/shared/tags/AddTagsTermsModal.tsx +++ b/datahub-web-react/src/app/shared/tags/AddTagsTermsModal.tsx @@ -1,11 +1,10 @@ -import React, { useState } from 'react'; +import React, { useRef, useState } from 'react'; import { message, Button, Modal, Select, Typography, Tag as CustomTag } from 'antd'; import styled from 'styled-components'; import { useGetSearchResultsLazyQuery } from '../../../graphql/search.generated'; -import { EntityType, SubResourceType, SearchResult, Tag } from '../../../types.generated'; +import { EntityType, SubResourceType, Tag, Entity } from '../../../types.generated'; import CreateTagModal from './CreateTagModal'; -import { useEntityRegistry } from '../../useEntityRegistry'; import { useAddTagsMutation, useAddTermsMutation } from '../../../graphql/mutations.generated'; import analytics, { EventType, EntityActionType } from '../../analytics'; import { useEnterKeyListener } from '../useEnterKeyListener'; @@ -13,6 +12,8 @@ import TermLabel from '../TermLabel'; import TagLabel from '../TagLabel'; import GlossaryBrowser from '../../glossary/GlossaryBrowser/GlossaryBrowser'; import ClickOutside from '../ClickOutside'; +import { useEntityRegistry } from '../../useEntityRegistry'; +import { useGetRecommendations } from '../recommendation'; type AddTagsModalProps = { visible: boolean; @@ -27,6 +28,17 @@ const TagSelect = styled(Select)` width: 480px; `; +const StyleTag = styled(CustomTag)` + margin-right: 3px; + display: flex; + justify-content: start; + align-items: center; + white-space: nowrap; + opacity: 1; + color: #434343; + line-height: 16px; +`; + export const BrowserWrapper = styled.div<{ isHidden: boolean }>` background-color: white; border-radius: 5px; @@ -61,13 +73,16 @@ export default function AddTagsTermsModal({ const [disableAdd, setDisableAdd] = useState(false); const [urns, setUrns] = useState([]); const [selectedTerms, setSelectedTerms] = useState([]); + const [selectedTags, setSelectedTags] = useState([]); const [isFocusedOnInput, setIsFocusedOnInput] = useState(false); const [addTagsMutation] = useAddTagsMutation(); const [addTermsMutation] = useAddTermsMutation(); const [tagTermSearch, { data: tagTermSearchData }] = useGetSearchResultsLazyQuery(); - const tagSearchResults = tagTermSearchData?.search?.searchResults || []; + const tagSearchResults = tagTermSearchData?.search?.searchResults?.map((searchResult) => searchResult.entity) || []; + const [recommendedData] = useGetRecommendations([EntityType.Tag]); + const inputEl = useRef(null); const handleSearch = (text: string) => { if (text.length > 0) { @@ -84,39 +99,40 @@ export default function AddTagsTermsModal({ } }; - const renderSearchResult = (result: SearchResult) => { + const renderSearchResult = (entity: Entity) => { const displayName = - result.entity.type === EntityType.Tag - ? (result.entity as Tag).name - : entityRegistry.getDisplayName(result.entity.type, result.entity); + entity.type === EntityType.Tag ? (entity as Tag).name : entityRegistry.getDisplayName(entity.type, entity); const tagOrTermComponent = - result.entity.type === EntityType.Tag ? ( + entity.type === EntityType.Tag ? ( ) : ( ); return ( - + {tagOrTermComponent} ); }; - const tagSearchOptions = tagSearchResults.map((result) => { + const tagResult = + (!inputValue || inputValue.length === 0) && type === EntityType.Tag ? recommendedData : tagSearchResults; + + const tagSearchOptions = tagResult?.map((result) => { return renderSearchResult(result); }); - const inputExistsInTagSearch = tagSearchResults.some((result: SearchResult) => { - const displayName = entityRegistry.getDisplayName(result.entity.type, result.entity); + const inputExistsInTagSearch = tagSearchResults.some((entity: Entity) => { + const displayName = entityRegistry.getDisplayName(entity.type, entity); return displayName.toLowerCase() === inputValue.toLowerCase(); }); if (!inputExistsInTagSearch && inputValue.length > 0 && type === EntityType.Tag && urns.length === 0) { - tagSearchOptions.push( + tagSearchOptions?.push( Create {inputValue} , @@ -125,32 +141,20 @@ export default function AddTagsTermsModal({ const tagRender = (props) => { // eslint-disable-next-line react/prop-types - const { label, closable, onClose, value } = props; + const { closable, onClose, value } = props; const onPreventMouseDown = (event) => { event.preventDefault(); event.stopPropagation(); }; const selectedItem = - type === EntityType.GlossaryTerm ? selectedTerms.find((term) => term.urn === value).component : label; + type === EntityType.GlossaryTerm + ? selectedTerms.find((term) => term.urn === value).component + : selectedTags.find((term) => term.urn === value).component; return ( - + {selectedItem} - + ); }; @@ -179,9 +183,26 @@ export default function AddTagsTermsModal({ return; } const newUrns = [...(urns || []), urn]; + const selectedSearchOption = tagSearchOptions?.find((option) => option.props.value === urn); + const selectedTagOption = tagResult?.find((tag) => tag.urn === urn); setUrns(newUrns); - const selectedSearchOption = tagSearchOptions.find((option) => option.props.value === urn); setSelectedTerms([...selectedTerms, { urn, component: }]); + setSelectedTags([ + ...selectedTags, + { + urn, + component: ( + + ), + }, + ]); + if (inputEl && inputEl.current) { + (inputEl.current as any).blur(); + } }; // When a Tag or term search result is deselected, remove the urn from the Owners @@ -191,6 +212,7 @@ export default function AddTagsTermsModal({ setInputValue(''); setIsFocusedOnInput(true); setSelectedTerms(selectedTerms.filter((term) => term.urn !== urn)); + setSelectedTags(selectedTags.filter((term) => term.urn !== urn)); }; // Function to handle the modal action's @@ -313,7 +335,9 @@ export default function AddTagsTermsModal({ setIsFocusedOnInput(false)}> setIsFocusedOnInput(true)} onBlur={handleBlur} - dropdownStyle={isShowingGlossaryBrowser || !inputValue ? { display: 'none' } : {}} + dropdownStyle={isShowingGlossaryBrowser ? { display: 'none' } : {}} > {tagSearchOptions} From be0bc0f0f0f22d5a0f5b78c960ffcfdaec9ecb0b Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Tue, 28 Jun 2022 17:27:42 -0400 Subject: [PATCH 12/18] fix(security) Sanitize rich text before sending to backend or rendering on frontend (#5278) --- datahub-web-react/package.json | 2 ++ .../Schema/utils/useDescriptionRenderer.tsx | 10 +++++--- .../tabs/Documentation/DocumentationTab.tsx | 8 +++--- .../__tests__/DocumentationTab.test.tsx | 25 +++++++++++++++++++ .../components/DescriptionEditor.tsx | 7 ++++-- datahub-web-react/yarn.lock | 17 +++++++++++++ 6 files changed, 61 insertions(+), 8 deletions(-) diff --git a/datahub-web-react/package.json b/datahub-web-react/package.json index 64cb716a104b8d..6c2b12ba1fac56 100644 --- a/datahub-web-react/package.json +++ b/datahub-web-react/package.json @@ -22,6 +22,7 @@ "@testing-library/user-event": "^12.6.0", "@tommoor/remove-markdown": "^0.3.2", "@types/diff": "^5.0.0", + "@types/dompurify": "^2.3.3", "@types/jest": "^26.0.19", "@types/js-cookie": "^2.2.6", "@types/node": "^12.19.9", @@ -54,6 +55,7 @@ "d3-time-format": "^3.0.0", "deepmerge": "^4.2.2", "diff": "^5.0.0", + "dompurify": "^2.3.8", "dotenv": "^8.2.0", "faker": "5.5.3", "find-webpack": "2.2.1", diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx index 2e4d8f2c3d1695..f6736c4e2035e3 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx @@ -1,4 +1,5 @@ import React from 'react'; +import DOMPurify from 'dompurify'; import { EditableSchemaMetadata, SchemaField, SubResourceType } from '../../../../../../../types.generated'; import DescriptionField from '../../../../../dataset/profile/schema/components/SchemaDescriptionField'; import { pathMatchesNewPath } from '../../../../../dataset/profile/schema/utils/utils'; @@ -14,17 +15,20 @@ export default function useDescriptionRenderer(editableSchemaMetadata: EditableS const relevantEditableFieldInfo = editableSchemaMetadata?.editableSchemaFieldInfo.find( (candidateEditableFieldInfo) => pathMatchesNewPath(candidateEditableFieldInfo.fieldPath, record.fieldPath), ); + const displayedDescription = relevantEditableFieldInfo?.description || description; + const sanitizedDescription = DOMPurify.sanitize(displayedDescription); + const original = record.description ? DOMPurify.sanitize(record.description) : undefined; return ( updateDescription({ variables: { input: { - description: updatedDescription, + description: DOMPurify.sanitize(updatedDescription), resourceUrn: urn, subResource: record.fieldPath, subResourceType: SubResourceType.DatasetField, diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/DocumentationTab.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/DocumentationTab.tsx index ac5b43f1f804fc..a7686b3d18dd8d 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/DocumentationTab.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/DocumentationTab.tsx @@ -6,6 +6,7 @@ import styled from 'styled-components'; import { Button, Divider, Typography } from 'antd'; import { EditOutlined } from '@ant-design/icons'; import MDEditor from '@uiw/react-md-editor'; +import DOMPurify from 'dompurify'; import TabToolbar from '../../components/styled/TabToolbar'; import { AddLinkModal } from '../../components/styled/AddLinkModal'; @@ -32,6 +33,7 @@ export const DocumentationTab = ({ properties }: { properties?: Props }) => { const { urn, entityData } = useEntityData(); const refetch = useRefetch(); const description = entityData?.editableProperties?.description || entityData?.properties?.description || ''; + const sanitizedDescription = DOMPurify.sanitize(description); const links = entityData?.institutionalMemory?.elements || []; const localStorageDictionary = localStorage.getItem(EDITED_DESCRIPTIONS_CACHE_NAME); @@ -51,7 +53,7 @@ export const DocumentationTab = ({ properties }: { properties?: Props }) => { ) : ( <> - {description || links.length ? ( + {sanitizedDescription || links.length ? ( <>
@@ -65,8 +67,8 @@ export const DocumentationTab = ({ properties }: { properties?: Props }) => {
- {description ? ( - + {sanitizedDescription ? ( + ) : ( No documentation added yet. )} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/__tests__/DocumentationTab.test.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/__tests__/DocumentationTab.test.tsx index 0263cfd7e3a004..18119aa008294a 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/__tests__/DocumentationTab.test.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/__tests__/DocumentationTab.test.tsx @@ -1,5 +1,6 @@ import { MockedProvider } from '@apollo/client/testing'; import { render } from '@testing-library/react'; +import DOMPurify from 'dompurify'; import React from 'react'; import { mocks } from '../../../../../../Mocks'; import { EntityType } from '../../../../../../types.generated'; @@ -62,3 +63,27 @@ describe('SchemaDescriptionField', () => { expect(queryByText('This is a description')).not.toBeInTheDocument(); }); }); + +describe('markdown sanitization', () => { + it('should remove malicious tags like '; + const sanitizedText = DOMPurify.sanitize(text); + + expect(sanitizedText).toBe('Testing this out'); + }); + + it('should allow acceptable html', () => { + const text = 'Testing this

out

for
safety
'; + const sanitizedText = DOMPurify.sanitize(text); + + expect(sanitizedText).toBe(text); + }); + + it('should allow acceptable markdown', () => { + const text = + '~~Testing~~ **this** *out* \n\n> for\n\n- safety\n\n1. ordered list\n\n[ test link](https://www.google.com/)\n'; + const sanitizedText = DOMPurify.sanitize(text); + + expect(sanitizedText).toBe(text); + }); +}); diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/DescriptionEditor.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/DescriptionEditor.tsx index 7e91404455336f..1800320e0c5596 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/DescriptionEditor.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/DescriptionEditor.tsx @@ -1,6 +1,7 @@ import React, { useState, useEffect } from 'react'; import { message, Button } from 'antd'; import { CheckOutlined } from '@ant-design/icons'; +import DOMPurify from 'dompurify'; import analytics, { EventType, EntityActionType } from '../../../../../analytics'; @@ -31,16 +32,18 @@ export const DescriptionEditor = ({ onComplete }: { onComplete?: () => void }) = const [cancelModalVisible, setCancelModalVisible] = useState(false); const updateDescriptionLegacy = () => { + const sanitizedDescription = DOMPurify.sanitize(updatedDescription); return updateEntity?.({ - variables: { urn: mutationUrn, input: { editableProperties: { description: updatedDescription || '' } } }, + variables: { urn: mutationUrn, input: { editableProperties: { description: sanitizedDescription || '' } } }, }); }; const updateDescription = () => { + const sanitizedDescription = DOMPurify.sanitize(updatedDescription); return updateDescriptionMutation({ variables: { input: { - description: updatedDescription, + description: sanitizedDescription, resourceUrn: mutationUrn, }, }, diff --git a/datahub-web-react/yarn.lock b/datahub-web-react/yarn.lock index 8b1cc6ce75d96e..72cc134fc4c231 100644 --- a/datahub-web-react/yarn.lock +++ b/datahub-web-react/yarn.lock @@ -2826,6 +2826,13 @@ resolved "https://registry.yarnpkg.com/@types/diff/-/diff-5.0.0.tgz#eb71e94feae62548282c4889308a3dfb57e36020" integrity sha512-jrm2K65CokCCX4NmowtA+MfXyuprZC13jbRuwprs6/04z/EcFg/MCwYdsHn+zgV4CQBiATiI7AEq7y1sZCtWKA== +"@types/dompurify@^2.3.3": + version "2.3.3" + resolved "https://registry.yarnpkg.com/@types/dompurify/-/dompurify-2.3.3.tgz#c24c92f698f77ed9cc9d9fa7888f90cf2bfaa23f" + integrity sha512-nnVQSgRVuZ/843oAfhA25eRSNzUFcBPk/LOiw5gm8mD9/X7CNcbRkQu/OsjCewO8+VIYfPxUnXvPEVGenw14+w== + dependencies: + "@types/trusted-types" "*" + "@types/eslint@^7.2.6": version "7.2.11" resolved "https://registry.yarnpkg.com/@types/eslint/-/eslint-7.2.11.tgz#180b58f5bb7d7376e39d22496e2b08901aa52fd2" @@ -3131,6 +3138,11 @@ dependencies: "@types/jest" "*" +"@types/trusted-types@*": + version "2.0.2" + resolved "https://registry.yarnpkg.com/@types/trusted-types/-/trusted-types-2.0.2.tgz#fc25ad9943bcac11cceb8168db4f275e0e72e756" + integrity sha512-F5DIZ36YVLE+PN+Zwws4kJogq47hNgX3Nx6WyDJ3kcplxyke3XIzB8uK5n/Lpm1HBsbGzd6nmGehL8cPekP+Tg== + "@types/uglify-js@*": version "3.13.0" resolved "https://registry.yarnpkg.com/@types/uglify-js/-/uglify-js-3.13.0.tgz#1cad8df1fb0b143c5aba08de5712ea9d1ff71124" @@ -6910,6 +6922,11 @@ domhandler@^2.3.0: dependencies: domelementtype "1" +dompurify@^2.3.8: + version "2.3.8" + resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-2.3.8.tgz#224fe9ae57d7ebd9a1ae1ac18c1c1ca3f532226f" + integrity sha512-eVhaWoVibIzqdGYjwsBWodIQIaXFSB+cKDf4cfxLMsK0xiud6SE+/WCVx/Xw/UwQsa4cS3T2eITcdtmTg2UKcw== + domutils@^1.5.1, domutils@^1.7.0: version "1.7.0" resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.7.0.tgz#56ea341e834e06e6748af7a1cb25da67ea9f8c2a" From fdee7a787bdd90dd0825b54e4737649b15b738c8 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Tue, 28 Jun 2022 17:57:22 -0400 Subject: [PATCH 13/18] feat(GraphQL): Support for Deleting Domains, Tags via GraphQL API (#5272) --- .../datahub/graphql/GmsGraphQLEngine.java | 8 +- .../authorization/AuthorizationUtils.java | 54 +++++++++ .../datahub/graphql/resolvers/MeResolver.java | 4 + .../domain/CreateDomainResolver.java | 34 ++---- .../domain/DeleteDomainResolver.java | 55 ++++++++++ .../resolvers/domain/ListDomainsResolver.java | 2 +- .../resolvers/tag/CreateTagResolver.java | 80 ++++++++++++++ .../resolvers/tag/DeleteTagResolver.java | 58 ++++++++++ .../tag/mappers/TagUpdateInputMapper.java | 1 - .../src/main/resources/app.graphql | 15 +++ .../src/main/resources/entity.graphql | 45 +++++++- .../domain/CreateDomainResolverTest.java | 34 +++--- .../domain/DeleteDomainResolverTest.java | 56 ++++++++++ .../resolvers/tag/CreateTagResolverTest.java | 103 ++++++++++++++++++ .../resolvers/tag/DeleteTagResolverTest.java | 56 ++++++++++ datahub-web-react/src/Mocks.tsx | 14 +++ datahub-web-react/src/graphql/me.graphql | 3 + docs/policies.md | 5 +- .../war/src/main/resources/boot/policies.json | 6 +- .../authorization/PoliciesConfig.java | 30 ++++- 20 files changed, 615 insertions(+), 48 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/DeleteTagResolver.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolverTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolverTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/DeleteTagResolverTest.java diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 1996d3a577b508..4d47e7ce60ef61 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -84,6 +84,7 @@ import com.linkedin.datahub.graphql.resolvers.dataset.DatasetHealthResolver; import com.linkedin.datahub.graphql.resolvers.deprecation.UpdateDeprecationResolver; import com.linkedin.datahub.graphql.resolvers.domain.CreateDomainResolver; +import com.linkedin.datahub.graphql.resolvers.domain.DeleteDomainResolver; import com.linkedin.datahub.graphql.resolvers.domain.DomainEntitiesResolver; import com.linkedin.datahub.graphql.resolvers.domain.ListDomainsResolver; import com.linkedin.datahub.graphql.resolvers.domain.SetDomainResolver; @@ -153,6 +154,8 @@ import com.linkedin.datahub.graphql.resolvers.search.SearchAcrossEntitiesResolver; import com.linkedin.datahub.graphql.resolvers.search.SearchAcrossLineageResolver; import com.linkedin.datahub.graphql.resolvers.search.SearchResolver; +import com.linkedin.datahub.graphql.resolvers.tag.CreateTagResolver; +import com.linkedin.datahub.graphql.resolvers.tag.DeleteTagResolver; import com.linkedin.datahub.graphql.resolvers.tag.SetTagColorResolver; import com.linkedin.datahub.graphql.resolvers.test.CreateTestResolver; import com.linkedin.datahub.graphql.resolvers.test.DeleteTestResolver; @@ -672,8 +675,10 @@ private String getUrnField(DataFetchingEnvironment env) { private void configureMutationResolvers(final RuntimeWiring.Builder builder) { builder.type("Mutation", typeWiring -> typeWiring .dataFetcher("updateDataset", new MutableTypeResolver<>(datasetType)) + .dataFetcher("createTag", new CreateTagResolver(entityService)) .dataFetcher("updateTag", new MutableTypeResolver<>(tagType)) .dataFetcher("setTagColor", new SetTagColorResolver(entityClient, entityService)) + .dataFetcher("deleteTag", new DeleteTagResolver(entityClient)) .dataFetcher("updateChart", new MutableTypeResolver<>(chartType)) .dataFetcher("updateDashboard", new MutableTypeResolver<>(dashboardType)) .dataFetcher("updateNotebook", new MutableTypeResolver<>(notebookType)) @@ -702,7 +707,8 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("removeUser", new RemoveUserResolver(this.entityClient)) .dataFetcher("removeGroup", new RemoveGroupResolver(this.entityClient)) .dataFetcher("updateUserStatus", new UpdateUserStatusResolver(this.entityClient)) - .dataFetcher("createDomain", new CreateDomainResolver(this.entityClient)) + .dataFetcher("createDomain", new CreateDomainResolver(this.entityService)) + .dataFetcher("deleteDomain", new DeleteDomainResolver(entityClient)) .dataFetcher("setDomain", new SetDomainResolver(this.entityClient, this.entityService)) .dataFetcher("updateDeprecation", new UpdateDeprecationResolver(this.entityClient, this.entityService)) .dataFetcher("unsetDomain", new UnsetDomainResolver(this.entityClient, this.entityService)) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java index 0ff396374e1ede..d7a5940f9840c8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java @@ -5,14 +5,24 @@ import com.datahub.authorization.Authorizer; import com.datahub.authorization.ResourceSpec; import com.google.common.collect.ImmutableList; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.metadata.authorization.PoliciesConfig; +import java.time.Clock; import java.util.Optional; import javax.annotation.Nonnull; public class AuthorizationUtils { + private static final Clock CLOCK = Clock.systemUTC(); + + public static AuditStamp createAuditStamp(@Nonnull QueryContext context) { + return new AuditStamp().setTime(CLOCK.millis()).setActor(UrnUtils.getUrn(context.getActorUrn())); + } + public static boolean canManageUsersAndGroups(@Nonnull QueryContext context) { return isAuthorized(context, Optional.empty(), PoliciesConfig.MANAGE_USERS_AND_GROUPS_PRIVILEGE); } @@ -25,6 +35,24 @@ public static boolean canManageTokens(@Nonnull QueryContext context) { return isAuthorized(context, Optional.empty(), PoliciesConfig.MANAGE_ACCESS_TOKENS); } + /** + * Returns true if the current used is able to create Domains. This is true if the user has the 'Manage Domains' or 'Create Domains' platform privilege. + */ + public static boolean canCreateDomains(@Nonnull QueryContext context) { + final DisjunctivePrivilegeGroup orPrivilegeGroups = new DisjunctivePrivilegeGroup( + ImmutableList.of( + new ConjunctivePrivilegeGroup(ImmutableList.of( + PoliciesConfig.CREATE_DOMAINS_PRIVILEGE.getType())), + new ConjunctivePrivilegeGroup(ImmutableList.of( + PoliciesConfig.MANAGE_DOMAINS_PRIVILEGE.getType())) + )); + + return AuthorizationUtils.isAuthorized( + context.getAuthorizer(), + context.getActorUrn(), + orPrivilegeGroups); + } + public static boolean canManageDomains(@Nonnull QueryContext context) { return isAuthorized(context, Optional.empty(), PoliciesConfig.MANAGE_DOMAINS_PRIVILEGE); } @@ -33,6 +61,32 @@ public static boolean canManageGlossaries(@Nonnull QueryContext context) { return isAuthorized(context, Optional.empty(), PoliciesConfig.MANAGE_GLOSSARIES_PRIVILEGE); } + /** + * Returns true if the current used is able to create Tags. This is true if the user has the 'Manage Tags' or 'Create Tags' platform privilege. + */ + public static boolean canCreateTags(@Nonnull QueryContext context) { + final DisjunctivePrivilegeGroup orPrivilegeGroups = new DisjunctivePrivilegeGroup( + ImmutableList.of( + new ConjunctivePrivilegeGroup(ImmutableList.of( + PoliciesConfig.CREATE_TAGS_PRIVILEGE.getType())), + new ConjunctivePrivilegeGroup(ImmutableList.of( + PoliciesConfig.MANAGE_TAGS_PRIVILEGE.getType())) + )); + + return AuthorizationUtils.isAuthorized( + context.getAuthorizer(), + context.getActorUrn(), + orPrivilegeGroups); + } + + public static boolean canManageTags(@Nonnull QueryContext context) { + return isAuthorized(context, Optional.empty(), PoliciesConfig.MANAGE_TAGS_PRIVILEGE); + } + + public static boolean canDeleteEntity(@Nonnull Urn entityUrn, @Nonnull QueryContext context) { + return isAuthorized(context, Optional.of(new ResourceSpec(entityUrn.getEntityType(), entityUrn.toString())), PoliciesConfig.DELETE_ENTITY_PRIVILEGE); + } + public static boolean canManageUserCredentials(@Nonnull QueryContext context) { return isAuthorized(context, Optional.empty(), PoliciesConfig.MANAGE_USER_CREDENTIALS_PRIVILEGE); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java index ecd6a422903b88..35ad96193263a8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java @@ -5,6 +5,7 @@ import com.datahub.authorization.Authorizer; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.generated.AuthenticatedUser; import com.linkedin.datahub.graphql.generated.CorpUser; import com.linkedin.datahub.graphql.generated.PlatformPrivileges; @@ -65,6 +66,9 @@ public CompletableFuture get(DataFetchingEnvironment environm platformPrivileges.setManageTests(canManageTests(context)); platformPrivileges.setManageGlossaries(canManageGlossaries(context)); platformPrivileges.setManageUserCredentials(canManageUserCredentials(context)); + platformPrivileges.setCreateDomains(AuthorizationUtils.canCreateDomains(context)); + platformPrivileges.setCreateTags(AuthorizationUtils.canCreateTags(context)); + platformPrivileges.setManageTags(AuthorizationUtils.canManageTags(context)); // Construct and return authenticated user object. final AuthenticatedUser authUser = new AuthenticatedUser(); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java index 3883a4b6f57693..4a435a8bfc9ef5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java @@ -1,19 +1,16 @@ package com.linkedin.datahub.graphql.resolvers.domain; -import com.google.common.collect.ImmutableList; import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; -import com.linkedin.datahub.graphql.authorization.ConjunctivePrivilegeGroup; -import com.linkedin.datahub.graphql.authorization.DisjunctivePrivilegeGroup; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateDomainInput; import com.linkedin.domain.DomainProperties; -import com.linkedin.entity.client.EntityClient; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; -import com.linkedin.metadata.authorization.PoliciesConfig; +import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.key.DomainKey; +import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetcher; @@ -23,16 +20,17 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import static com.linkedin.datahub.graphql.authorization.AuthorizationUtils.*; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; /** - * Resolver used for creating a new Domain on DataHub. Requires the MANAGE_DOMAINS privilege. + * Resolver used for creating a new Domain on DataHub. Requires the CREATE_DOMAINS or MANAGE_DOMAINS privilege. */ @Slf4j @RequiredArgsConstructor public class CreateDomainResolver implements DataFetcher> { - private final EntityClient _entityClient; + private final EntityService _entityService; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -42,12 +40,10 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws return CompletableFuture.supplyAsync(() -> { - if (!isAuthorizedToCreateDomain(context)) { + if (!AuthorizationUtils.canCreateDomains(context)) { throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator."); } - // TODO: Add exists check. Currently this can override previously created domains. - try { // Create the Domain Key final DomainKey key = new DomainKey(); @@ -56,6 +52,10 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws final String id = input.getId() != null ? input.getId() : UUID.randomUUID().toString(); key.setId(id); + if (_entityService.exists(EntityKeyUtils.convertEntityKeyToUrn(key, Constants.DOMAIN_ENTITY_NAME))) { + throw new IllegalArgumentException("This Domain already exists!"); + } + // Create the MCP final MetadataChangeProposal proposal = new MetadataChangeProposal(); proposal.setEntityKeyAspect(GenericRecordUtils.serializeAspect(key)); @@ -63,7 +63,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws proposal.setAspectName(Constants.DOMAIN_PROPERTIES_ASPECT_NAME); proposal.setAspect(GenericRecordUtils.serializeAspect(mapDomainProperties(input))); proposal.setChangeType(ChangeType.UPSERT); - return _entityClient.ingestProposal(proposal, context.getAuthentication()); + + return _entityService.ingestProposal(proposal, createAuditStamp(context)).getUrn().toString(); } catch (Exception e) { log.error("Failed to create Domain with id: {}, name: {}: {}", input.getId(), input.getName(), e.getMessage()); throw new RuntimeException(String.format("Failed to create Domain with id: %s, name: %s", input.getId(), input.getName()), e); @@ -77,15 +78,4 @@ private DomainProperties mapDomainProperties(final CreateDomainInput input) { result.setDescription(input.getDescription(), SetMode.IGNORE_NULL); return result; } - - private boolean isAuthorizedToCreateDomain(final QueryContext context) { - final DisjunctivePrivilegeGroup orPrivilegeGroups = new DisjunctivePrivilegeGroup(ImmutableList.of( - new ConjunctivePrivilegeGroup(ImmutableList.of(PoliciesConfig.MANAGE_DOMAINS_PRIVILEGE.getType())) - )); - - return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - orPrivilegeGroups); - } } \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolver.java new file mode 100644 index 00000000000000..e3f0ffd1ac7b13 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolver.java @@ -0,0 +1,55 @@ +package com.linkedin.datahub.graphql.resolvers.domain; + +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.r2.RemoteInvocationException; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.concurrent.CompletableFuture; +import lombok.extern.slf4j.Slf4j; + + +/** + * Resolver responsible for hard deleting a particular DataHub Corp Group + */ +@Slf4j +public class DeleteDomainResolver implements DataFetcher> { + + private final EntityClient _entityClient; + + public DeleteDomainResolver(final EntityClient entityClient) { + _entityClient = entityClient; + } + + @Override + public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { + final QueryContext context = environment.getContext(); + final String domainUrn = environment.getArgument("urn"); + final Urn urn = Urn.createFromString(domainUrn); + return CompletableFuture.supplyAsync(() -> { + + if (AuthorizationUtils.canManageDomains(context) || AuthorizationUtils.canDeleteEntity(urn, context)) { + try { + _entityClient.deleteEntity(urn, context.getAuthentication()); + + // Asynchronously Delete all references to the entity (to return quickly) + CompletableFuture.runAsync(() -> { + try { + _entityClient.deleteEntityReferences(urn, context.getAuthentication()); + } catch (RemoteInvocationException e) { + log.error(String.format("Caught exception while attempting to clear all entity references for Domain with urn %s", urn), e); + } + }); + + return true; + } catch (Exception e) { + throw new RuntimeException(String.format("Failed to perform delete against domain with urn %s", domainUrn), e); + } + } + throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator."); + }); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java index 076387449816ac..0cf6d70e6909ec 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/ListDomainsResolver.java @@ -45,7 +45,7 @@ public CompletableFuture get(final DataFetchingEnvironment en return CompletableFuture.supplyAsync(() -> { - if (AuthorizationUtils.canManageDomains(context)) { + if (AuthorizationUtils.canCreateDomains(context)) { final ListDomainsInput input = bindArgument(environment.getArgument("input"), ListDomainsInput.class); final Integer start = input.getStart() == null ? DEFAULT_START : input.getStart(); final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolver.java new file mode 100644 index 00000000000000..22e3f9187505d3 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolver.java @@ -0,0 +1,80 @@ +package com.linkedin.datahub.graphql.resolvers.tag; + +import com.linkedin.data.template.SetMode; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.generated.CreateTagInput; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.key.TagKey; +import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.tag.TagProperties; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import static com.linkedin.datahub.graphql.authorization.AuthorizationUtils.*; +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; + +/** + * Resolver used for creating a new Tag on DataHub. Requires the CREATE_TAG or MANAGE_TAGS privilege. + */ +@Slf4j +@RequiredArgsConstructor +public class CreateTagResolver implements DataFetcher> { + + private final EntityService _entityService; + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + + final QueryContext context = environment.getContext(); + final CreateTagInput input = bindArgument(environment.getArgument("input"), CreateTagInput.class); + + return CompletableFuture.supplyAsync(() -> { + + if (!AuthorizationUtils.canCreateTags(context)) { + throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator."); + } + + try { + // Create the Tag Key + final TagKey key = new TagKey(); + + // Take user provided id OR generate a random UUID for the Tag. + final String id = input.getId() != null ? input.getId() : UUID.randomUUID().toString(); + key.setName(id); + + if (_entityService.exists(EntityKeyUtils.convertEntityKeyToUrn(key, Constants.TAG_ENTITY_NAME))) { + throw new IllegalArgumentException("This Tag already exists!"); + } + + // Create the MCP + final MetadataChangeProposal proposal = new MetadataChangeProposal(); + proposal.setEntityKeyAspect(GenericRecordUtils.serializeAspect(key)); + proposal.setEntityType(Constants.TAG_ENTITY_NAME); + proposal.setAspectName(Constants.TAG_PROPERTIES_ASPECT_NAME); + proposal.setAspect(GenericRecordUtils.serializeAspect(mapTagProperties(input))); + proposal.setChangeType(ChangeType.UPSERT); + return _entityService.ingestProposal(proposal, createAuditStamp(context)).getUrn().toString(); + } catch (Exception e) { + log.error("Failed to create Domain with id: {}, name: {}: {}", input.getId(), input.getName(), e.getMessage()); + throw new RuntimeException(String.format("Failed to create Domain with id: %s, name: %s", input.getId(), input.getName()), e); + } + }); + } + + private TagProperties mapTagProperties(final CreateTagInput input) { + final TagProperties result = new TagProperties(); + result.setName(input.getName()); + result.setDescription(input.getDescription(), SetMode.IGNORE_NULL); + return result; + } +} \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/DeleteTagResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/DeleteTagResolver.java new file mode 100644 index 00000000000000..72b95935838ef5 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/DeleteTagResolver.java @@ -0,0 +1,58 @@ +package com.linkedin.datahub.graphql.resolvers.tag; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.r2.RemoteInvocationException; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.concurrent.CompletableFuture; +import lombok.extern.slf4j.Slf4j; + + +/** + * Resolver responsible for hard deleting a particular DataHub Corp Group + */ +@Slf4j +public class DeleteTagResolver implements DataFetcher> { + + private final EntityClient _entityClient; + + public DeleteTagResolver(final EntityClient entityClient) { + _entityClient = entityClient; + } + + @Override + public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { + final QueryContext context = environment.getContext(); + final String tagUrn = environment.getArgument("urn"); + final Urn urn = Urn.createFromString(tagUrn); + + return CompletableFuture.supplyAsync(() -> { + + if (AuthorizationUtils.canManageTags(context) || AuthorizationUtils.canDeleteEntity(UrnUtils.getUrn(tagUrn), context)) { + try { + _entityClient.deleteEntity(urn, context.getAuthentication()); + + // Asynchronously Delete all references to the entity (to return quickly) + CompletableFuture.runAsync(() -> { + try { + _entityClient.deleteEntityReferences(urn, context.getAuthentication()); + } catch (RemoteInvocationException e) { + log.error(String.format( + "Caught exception while attempting to clear all entity references for Tag with urn %s", urn), e); + } + }); + + return true; + } catch (Exception e) { + throw new RuntimeException(String.format("Failed to perform delete against domain with urn %s", tagUrn), e); + } + } + throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator."); + }); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/mappers/TagUpdateInputMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/mappers/TagUpdateInputMapper.java index 264f4041f88cfb..b666bf5c60ed55 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/mappers/TagUpdateInputMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/mappers/TagUpdateInputMapper.java @@ -42,7 +42,6 @@ public Collection apply( auditStamp.setActor(actor, SetMode.IGNORE_NULL); auditStamp.setTime(System.currentTimeMillis()); - // Creator is the owner. final Ownership ownership = new Ownership(); final Owner owner = new Owner(); diff --git a/datahub-graphql-core/src/main/resources/app.graphql b/datahub-graphql-core/src/main/resources/app.graphql index c183bd67006006..8f5e5e7a03f863 100644 --- a/datahub-graphql-core/src/main/resources/app.graphql +++ b/datahub-graphql-core/src/main/resources/app.graphql @@ -52,6 +52,11 @@ type PlatformPrivileges { """ generatePersonalAccessTokens: Boolean! + """ + Whether the user should be able to create new Domains + """ + createDomains: Boolean! + """ Whether the user should be able to manage Domains """ @@ -86,6 +91,16 @@ type PlatformPrivileges { Whether the user is able to manage user credentials """ manageUserCredentials: Boolean! + + """ + Whether the user should be able to create new Tags + """ + createTags: Boolean! + + """ + Whether the user should be able to create and delete all Tags + """ + manageTags: Boolean! } """ diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index ddfdc8e17222d7..f8941f681a8af9 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -210,11 +210,26 @@ type Mutation { """ updateDataJob(urn: String!, input: DataJobUpdateInput!): DataJob + """ + Create a new tag. Requires the 'Manage Tags' or 'Create Tags' Platform Privilege. If a Tag with the provided ID already exists, + it will be overwritten. + """ + createTag( + "Inputs required to create a new Tag." + input: CreateTagInput!): String + """ Update the information about a particular Entity Tag """ updateTag(urn: String!, input: TagUpdateInput!): Tag + """ + Delete a Tag + """ + deleteTag( + "The urn of the Tag to delete" + urn: String!): Boolean + """ Set the hex color associated with an existing Tag """ @@ -326,11 +341,19 @@ type Mutation { createGroup(input: CreateGroupInput!): String """ - Create a new Domain. Returns the urn of the newly created Domain. Requires the Manage Domains privilege. If a domain with the provided ID already exists, + Create a new Domain. Returns the urn of the newly created Domain. Requires the 'Create Domains' or 'Manage Domains' Platform Privilege. If a Domain with the provided ID already exists, it will be overwritten. """ createDomain(input: CreateDomainInput!): String + """ + Delete a Domain + """ + deleteDomain( + "The urn of the Domain to delete" + urn: String!): Boolean + + """ Sets the Domain for a Dataset, Chart, Dashboard, Data Flow (Pipeline), or Data Job (Task). Returns true if the Domain was successfully added, or already exists. Requires the Edit Domains privilege for the Entity. """ @@ -3615,6 +3638,26 @@ input TagUpdateInput { ownership: OwnershipUpdate } +""" +Input required to create a new Tag +""" +input CreateTagInput { + """ + Optional! A custom id to use as the primary key identifier for the Tag. If not provided, a random UUID will be generated as the id. + """ + id: String + + """ + Display name for the Tag + """ + name: String! + + """ + Optional description for the Tag + """ + description: String +} + """ An update for the ownership information for a Metadata Entity """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolverTest.java index 8fce428519c4b5..a1dbd4ae064dd7 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolverTest.java @@ -1,16 +1,16 @@ package com.linkedin.datahub.graphql.resolvers.domain; -import com.datahub.authentication.Authentication; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.CreateDomainInput; import com.linkedin.domain.DomainProperties; -import com.linkedin.entity.client.EntityClient; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.key.DomainKey; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeProposal; -import com.linkedin.r2.RemoteInvocationException; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletionException; import org.mockito.Mockito; @@ -31,8 +31,12 @@ public class CreateDomainResolverTest { @Test public void testGetSuccess() throws Exception { // Create resolver - EntityClient mockClient = Mockito.mock(EntityClient.class); - CreateDomainResolver resolver = new CreateDomainResolver(mockClient); + EntityService mockService = Mockito.mock(EntityService.class); + Mockito.when(mockService.ingestProposal(Mockito.any(MetadataChangeProposal.class), Mockito.any(AuditStamp.class))) + .thenReturn(new EntityService.IngestProposalResult(UrnUtils.getUrn( + String.format("urn:li:tag:%s", + TEST_INPUT.getId())), true)); + CreateDomainResolver resolver = new CreateDomainResolver(mockService); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -55,17 +59,17 @@ public void testGetSuccess() throws Exception { proposal.setChangeType(ChangeType.UPSERT); // Not ideal to match against "any", but we don't know the auto-generated execution request id - Mockito.verify(mockClient, Mockito.times(1)).ingestProposal( + Mockito.verify(mockService, Mockito.times(1)).ingestProposal( Mockito.eq(proposal), - Mockito.any(Authentication.class) + Mockito.any(AuditStamp.class) ); } @Test public void testGetUnauthorized() throws Exception { // Create resolver - EntityClient mockClient = Mockito.mock(EntityClient.class); - CreateDomainResolver resolver = new CreateDomainResolver(mockClient); + EntityService mockService = Mockito.mock(EntityService.class); + CreateDomainResolver resolver = new CreateDomainResolver(mockService); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); @@ -74,19 +78,19 @@ public void testGetUnauthorized() throws Exception { Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - Mockito.verify(mockClient, Mockito.times(0)).ingestProposal( + Mockito.verify(mockService, Mockito.times(0)).ingestProposal( Mockito.any(), - Mockito.any(Authentication.class)); + Mockito.any(AuditStamp.class)); } @Test public void testGetEntityClientException() throws Exception { // Create resolver - EntityClient mockClient = Mockito.mock(EntityClient.class); - Mockito.doThrow(RemoteInvocationException.class).when(mockClient).ingestProposal( + EntityService mockService = Mockito.mock(EntityService.class); + Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( Mockito.any(), - Mockito.any(Authentication.class)); - CreateDomainResolver resolver = new CreateDomainResolver(mockClient); + Mockito.any(AuditStamp.class)); + CreateDomainResolver resolver = new CreateDomainResolver(mockService); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolverTest.java new file mode 100644 index 00000000000000..1c450b0e85424d --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DeleteDomainResolverTest.java @@ -0,0 +1,56 @@ +package com.linkedin.datahub.graphql.resolvers.domain; + +import com.datahub.authentication.Authentication; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.entity.client.EntityClient; +import graphql.schema.DataFetchingEnvironment; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.testng.Assert.*; + + +public class DeleteDomainResolverTest { + + private static final String TEST_URN = "urn:li:domain:test-id"; + + @Test + public void testGetSuccess() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + DeleteDomainResolver resolver = new DeleteDomainResolver(mockClient); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("urn"))).thenReturn(TEST_URN); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertTrue(resolver.get(mockEnv).get()); + + Mockito.verify(mockClient, Mockito.times(1)).deleteEntity( + Mockito.eq(Urn.createFromString(TEST_URN)), + Mockito.any(Authentication.class) + ); + } + + @Test + public void testGetUnauthorized() throws Exception { + // Create resolver + EntityClient mockClient = Mockito.mock(EntityClient.class); + DeleteDomainResolver resolver = new DeleteDomainResolver(mockClient); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("urn"))).thenReturn(TEST_URN); + QueryContext mockContext = getMockDenyContext(); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + Mockito.verify(mockClient, Mockito.times(0)).deleteEntity( + Mockito.any(), + Mockito.any(Authentication.class)); + } +} \ No newline at end of file diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolverTest.java new file mode 100644 index 00000000000000..91217dfc1e2e46 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolverTest.java @@ -0,0 +1,103 @@ +package com.linkedin.datahub.graphql.resolvers.tag; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.CreateTagInput; +import com.linkedin.tag.TagProperties; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.key.TagKey; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import graphql.schema.DataFetchingEnvironment; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.testng.Assert.*; + + +public class CreateTagResolverTest { + + private static final CreateTagInput TEST_INPUT = new CreateTagInput( + "test-id", + "test-name", + "test-description" + ); + + @Test + public void testGetSuccess() throws Exception { + // Create resolver + EntityService mockService = Mockito.mock(EntityService.class); + Mockito.when(mockService.ingestProposal(Mockito.any(MetadataChangeProposal.class), Mockito.any(AuditStamp.class))) + .thenReturn(new EntityService.IngestProposalResult(UrnUtils.getUrn( + String.format("urn:li:tag:%s", + TEST_INPUT.getId())), true)); + CreateTagResolver resolver = new CreateTagResolver(mockService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + resolver.get(mockEnv).get(); + + final TagKey key = new TagKey(); + key.setName("test-id"); + final MetadataChangeProposal proposal = new MetadataChangeProposal(); + proposal.setEntityKeyAspect(GenericRecordUtils.serializeAspect(key)); + proposal.setEntityType(Constants.TAG_ENTITY_NAME); + TagProperties props = new TagProperties(); + props.setDescription("test-description"); + props.setName("test-name"); + proposal.setAspectName(Constants.TAG_PROPERTIES_ASPECT_NAME); + proposal.setAspect(GenericRecordUtils.serializeAspect(props)); + proposal.setChangeType(ChangeType.UPSERT); + + // Not ideal to match against "any", but we don't know the auto-generated execution request id + Mockito.verify(mockService, Mockito.times(1)).ingestProposal( + Mockito.eq(proposal), + Mockito.any(AuditStamp.class) + ); + } + + @Test + public void testGetUnauthorized() throws Exception { + // Create resolver + EntityService mockService = Mockito.mock(EntityService.class); + CreateTagResolver resolver = new CreateTagResolver(mockService); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + QueryContext mockContext = getMockDenyContext(); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + Mockito.verify(mockService, Mockito.times(0)).ingestProposal( + Mockito.any(), + Mockito.any(AuditStamp.class)); + } + + @Test + public void testGetEntityClientException() throws Exception { + // Create resolver + EntityService mockService = Mockito.mock(EntityService.class); + Mockito.doThrow(RuntimeException.class).when(mockService).ingestProposal( + Mockito.any(), + Mockito.any(AuditStamp.class)); + CreateTagResolver resolver = new CreateTagResolver(mockService); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + QueryContext mockContext = getMockAllowContext(); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } +} \ No newline at end of file diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/DeleteTagResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/DeleteTagResolverTest.java new file mode 100644 index 00000000000000..b01ac1a9b14ae9 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/DeleteTagResolverTest.java @@ -0,0 +1,56 @@ +package com.linkedin.datahub.graphql.resolvers.tag; + +import com.datahub.authentication.Authentication; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.entity.client.EntityClient; +import graphql.schema.DataFetchingEnvironment; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.testng.Assert.*; + + +public class DeleteTagResolverTest { + + private static final String TEST_URN = "urn:li:tag:test-id"; + + @Test + public void testGetSuccess() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + DeleteTagResolver resolver = new DeleteTagResolver(mockClient); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("urn"))).thenReturn(TEST_URN); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertTrue(resolver.get(mockEnv).get()); + + Mockito.verify(mockClient, Mockito.times(1)).deleteEntity( + Mockito.eq(Urn.createFromString(TEST_URN)), + Mockito.any(Authentication.class) + ); + } + + @Test + public void testGetUnauthorized() throws Exception { + // Create resolver + EntityClient mockClient = Mockito.mock(EntityClient.class); + DeleteTagResolver resolver = new DeleteTagResolver(mockClient); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("urn"))).thenReturn(TEST_URN); + QueryContext mockContext = getMockDenyContext(); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + Mockito.verify(mockClient, Mockito.times(0)).deleteEntity( + Mockito.any(), + Mockito.any(Authentication.class)); + } +} \ No newline at end of file diff --git a/datahub-web-react/src/Mocks.tsx b/datahub-web-react/src/Mocks.tsx index cab81edff9b191..a1b65f3e2b6acb 100644 --- a/datahub-web-react/src/Mocks.tsx +++ b/datahub-web-react/src/Mocks.tsx @@ -3084,6 +3084,17 @@ export const mocks = [ viewAnalytics: true, managePolicies: true, manageIdentities: true, + manageDomains: true, + manageTags: true, + createDomains: true, + createTags: true, + manageUserCredentials: true, + manageGlossaries: true, + manageTests: true, + manageTokens: true, + manageSecrets: true, + manageIngestion: true, + generatePersonalAccessTokens: true, }, }, }, @@ -3303,4 +3314,7 @@ export const platformPrivileges: PlatformPrivileges = { manageTests: true, manageGlossaries: true, manageUserCredentials: true, + manageTags: true, + createTags: true, + createDomains: true, }; diff --git a/datahub-web-react/src/graphql/me.graphql b/datahub-web-react/src/graphql/me.graphql index 7f1d426f3d6a7c..d28b1a60cd5048 100644 --- a/datahub-web-react/src/graphql/me.graphql +++ b/datahub-web-react/src/graphql/me.graphql @@ -31,6 +31,9 @@ query getMe { manageTests manageGlossaries manageUserCredentials + manageTags + createDomains + createTags } } } diff --git a/docs/policies.md b/docs/policies.md index da2078e3c840f3..32a89aca49384f 100644 --- a/docs/policies.md +++ b/docs/policies.md @@ -77,11 +77,14 @@ We currently support the following: | Manage Secrets | Allow actor to create & remove secrets stored inside DataHub. | | Manage Users & Groups | Allow actor to create, remove, and update users and groups on DataHub. | | Manage All Access Tokens | Allow actor to create, remove, and list access tokens for all users on DataHub. | -| Manage Domains | Allow actor to create and remove Asset Domains. | +| Create Domains | Allow the actor to create new Domains | +| Manage Domains | Allow actor to create and remove any Domains. | | View Analytics | Allow the actor access to the DataHub analytics dashboard. | | Generate Personal Access Tokens | Allow the actor to generate access tokens for personal use with DataHub APIs. | | Manage User Credentials | Allow the actor to generate invite links for new native DataHub users, and password reset links for existing native users. | | Manage Glossaries | Allow the actor to create, edit, move, and delete Glossary Terms and Term Groups | +| Create Tags | Allow the actor to create new Tags | +| Manage Tags | Allow the actor to create and remove any Tags | **Common metadata privileges** to view & modify any entity within DataHub. diff --git a/metadata-service/war/src/main/resources/boot/policies.json b/metadata-service/war/src/main/resources/boot/policies.json index ae154bd132e2b1..643f7a5e882a86 100644 --- a/metadata-service/war/src/main/resources/boot/policies.json +++ b/metadata-service/war/src/main/resources/boot/policies.json @@ -21,7 +21,8 @@ "MANAGE_DOMAINS", "MANAGE_TESTS", "MANAGE_GLOSSARIES", - "MANAGE_USER_CREDENTIALS" + "MANAGE_USER_CREDENTIALS", + "MANAGE_TAGS" ], "displayName":"Root User - All Platform Privileges", "description":"Grants full platform privileges to root datahub super user.", @@ -84,7 +85,8 @@ "GENERATE_PERSONAL_ACCESS_TOKENS", "MANAGE_DOMAINS", "MANAGE_TESTS", - "MANAGE_GLOSSARIES" + "MANAGE_GLOSSARIES", + "MANAGE_TAGS" ], "displayName":"All Users - All Platform Privileges", "description":"Grants full platform privileges to ALL users of DataHub. Change this policy to alter that behavior.", diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index 20b2074a160645..7c870e51834102 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -52,7 +52,6 @@ public class PoliciesConfig { "Generate Personal Access Tokens", "Generate personal access tokens for use with DataHub APIs."); - public static final Privilege MANAGE_ACCESS_TOKENS = Privilege.of( "MANAGE_ACCESS_TOKENS", "Manage All Access Tokens", @@ -79,6 +78,21 @@ public class PoliciesConfig { Privilege.of("MANAGE_USER_CREDENTIALS", "Manage User Credentials", "Manage credentials for native DataHub users, including inviting new users and resetting passwords"); + public static final Privilege MANAGE_TAGS_PRIVILEGE = Privilege.of( + "MANAGE_TAGS", + "Manage Tags", + "Create and remove Tags."); + + public static final Privilege CREATE_TAGS_PRIVILEGE = Privilege.of( + "CREATE_TAGS", + "Create Tags", + "Create new Tags."); + + public static final Privilege CREATE_DOMAINS_PRIVILEGE = Privilege.of( + "CREATE_DOMAINS", + "Create Domains", + "Create new Domains."); + public static final List PLATFORM_PRIVILEGES = ImmutableList.of( MANAGE_POLICIES_PRIVILEGE, MANAGE_USERS_AND_GROUPS_PRIVILEGE, @@ -90,7 +104,10 @@ public class PoliciesConfig { MANAGE_ACCESS_TOKENS, MANAGE_TESTS_PRIVILEGE, MANAGE_GLOSSARIES_PRIVILEGE, - MANAGE_USER_CREDENTIALS_PRIVILEGE + MANAGE_USER_CREDENTIALS_PRIVILEGE, + MANAGE_TAGS_PRIVILEGE, + CREATE_TAGS_PRIVILEGE, + CREATE_DOMAINS_PRIVILEGE ); // Resource Privileges // @@ -155,6 +172,11 @@ public class PoliciesConfig { "Edit All", "The ability to edit any information about an entity. Super user privileges."); + public static final Privilege DELETE_ENTITY_PRIVILEGE = Privilege.of( + "DELETE_ENTITY", + "Delete", + "The ability to delete the delete this entity."); + public static final List COMMON_ENTITY_PRIVILEGES = ImmutableList.of( VIEW_ENTITY_PAGE_PRIVILEGE, EDIT_ENTITY_TAGS_PRIVILEGE, @@ -283,7 +305,7 @@ public class PoliciesConfig { "Tags", "Tags indexed by DataHub", ImmutableList.of(VIEW_ENTITY_PAGE_PRIVILEGE, EDIT_ENTITY_OWNERS_PRIVILEGE, EDIT_TAG_COLOR_PRIVILEGE, - EDIT_ENTITY_DOCS_PRIVILEGE, EDIT_ENTITY_PRIVILEGE) + EDIT_ENTITY_DOCS_PRIVILEGE, EDIT_ENTITY_PRIVILEGE, DELETE_ENTITY_PRIVILEGE) ); // Container Privileges @@ -300,7 +322,7 @@ public class PoliciesConfig { "Domains", "Domains created on DataHub", ImmutableList.of(VIEW_ENTITY_PAGE_PRIVILEGE, EDIT_ENTITY_OWNERS_PRIVILEGE, EDIT_ENTITY_DOCS_PRIVILEGE, - EDIT_ENTITY_DOC_LINKS_PRIVILEGE, EDIT_ENTITY_PRIVILEGE) + EDIT_ENTITY_DOC_LINKS_PRIVILEGE, EDIT_ENTITY_PRIVILEGE, DELETE_ENTITY_PRIVILEGE) ); // Glossary Term Privileges From b73477f31e96d69f43ed307432b66b265772e076 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 29 Jun 2022 15:26:12 +0530 Subject: [PATCH 14/18] feat(build): reduce build time for ingestion image (#5225) --- .github/workflows/metadata-ingestion-slow.yml | 51 ------------------- docker/datahub-ingestion/Dockerfile | 12 +---- 2 files changed, 2 insertions(+), 61 deletions(-) delete mode 100644 .github/workflows/metadata-ingestion-slow.yml diff --git a/.github/workflows/metadata-ingestion-slow.yml b/.github/workflows/metadata-ingestion-slow.yml deleted file mode 100644 index 3c020d1d277dcc..00000000000000 --- a/.github/workflows/metadata-ingestion-slow.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: metadata ingestion slow integration tests -on: - push: - branches: - - master - paths-ignore: - - "docs/**" - - "**.md" - pull_request: - branches: - - master - paths: - - "**/nifi/**" - - "**/nifi.py" - - "**/hana/**" - - "**/hana.py" - release: - types: [published, edited] - -jobs: - metadata-ingestion-slow-integration: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.9"] - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: ./metadata-ingestion/scripts/install_deps.sh - - name: Run metadata-ingestion slow integration tests - run: ./gradlew :metadata-ingestion:testSlowIntegration - - uses: actions/upload-artifact@v2 - if: always() - with: - name: Test Results (metadata ingestion slow integration tests) - path: | - **/build/reports/tests/test/** - **/build/test-results/test/** - **/junit.*.xml - - event-file: - runs-on: ubuntu-latest - steps: - - name: Upload - uses: actions/upload-artifact@v2 - with: - name: Event File - path: ${{ github.event_path }} diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index 3cae6ecf16570e..899919b2978c41 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -1,20 +1,11 @@ # Defining environment ARG APP_ENV=prod -FROM python:3.8 as base +FROM acryldata/datahub-ingestion-base as base # ENV DOCKERIZE_VERSION v0.6.1 # RUN apk --no-cache add curl tar \ # && curl https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.20.v20190813/jetty-runner-9.4.20.v20190813.jar --output jetty-runner.jar \ # && curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv -RUN apt-get update && apt-get install -y \ - jq \ - librdkafka-dev \ - python3-ldap \ - libldap2-dev \ - libsasl2-dev \ - libsasl2-modules \ - ldap-utils \ - && python -m pip install --upgrade pip wheel setuptools==57.5.0 FROM openjdk:8 as prod-build @@ -29,6 +20,7 @@ RUN cd /datahub-src/metadata-ingestion && \ FROM base as prod-install COPY --from=prod-codegen /datahub-src/metadata-ingestion /datahub-ingestion +COPY --from=prod-codegen /root/.cache/pip /root/.cache/pip ARG RELEASE_VERSION RUN cd /datahub-ingestion && \ sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \ From fb1b1db7f77e63b5cb49cfe3fa33e5be44702eff Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Wed, 29 Jun 2022 14:00:52 +0200 Subject: [PATCH 15/18] fix(ingestion): profiling - Fixing partitioned table profiling in BQ (#5283) --- .../ingestion/source/ge_data_profiler.py | 19 +++++++++++++++--- .../datahub/ingestion/source/sql/bigquery.py | 20 ++++++++++++++----- .../ingestion/source/usage/bigquery_usage.py | 8 +++++--- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index a486b3bb545aaf..c2dd7c0f15d59b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -11,6 +11,7 @@ from great_expectations import __version__ as ge_version +from datahub.configuration.common import ConfigurationError from datahub.telemetry import stats, telemetry # Fun compatibility hack! GE version 0.13.44 broke compatibility with SQLAlchemy 1.3.24. @@ -872,7 +873,16 @@ def _generate_single_profile( ge_config["schema"] = temp_table_db if self.config.bigquery_temp_table_schema: - bigquery_temp_table = f"{temp_table_db}.{self.config.bigquery_temp_table_schema}.ge-temp-{uuid.uuid4()}" + num_parts = self.config.bigquery_temp_table_schema.split(".") + # If we only have 1 part that means the project_id is missing from the table name and we add it + if len(num_parts) == 1: + bigquery_temp_table = f"{temp_table_db}.{self.config.bigquery_temp_table_schema}.ge-temp-{uuid.uuid4()}" + elif len(num_parts) == 2: + bigquery_temp_table = f"{self.config.bigquery_temp_table_schema}.ge-temp-{uuid.uuid4()}" + else: + raise ConfigurationError( + f"bigquery_temp_table_schema should be either project.dataset or dataset format but it was: {self.config.bigquery_temp_table_schema}" + ) else: assert table table_parts = table.split(".") @@ -970,12 +980,15 @@ def _get_ge_dataset( if platform is not None and platform == "bigquery": # This is done as GE makes the name as DATASET.TABLE # but we want it to be PROJECT.DATASET.TABLE instead for multi-project setups - logger.debug(f"Setting table name to be {pretty_name}") - batch._table = sa.text(pretty_name) name_parts = pretty_name.split(".") if len(name_parts) != 3: logger.error( f"Unexpected {pretty_name} while profiling. Should have 3 parts but has {len(name_parts)} parts." ) + # If we only have two parts that means the project_id is missing from the table name and we add it + # Temp tables has 3 parts while normal tables only has 2 parts + if len(str(batch._table).split(".")) == 2: + batch._table = sa.text(f"{name_parts[0]}.{str(batch._table)}") + logger.debug(f"Setting table name to be {batch._table}") return batch diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py index cbbf3e561398ad..8aea67cc72bba8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py @@ -160,6 +160,11 @@ WHERE table_id LIKE '{table}%' """.strip() +BQ_GET_LATEST_DATE_TABLE = """ +SELECT MAX(table_name) as max_shard +FROM `{project_id}.{schema}.INFORMATION_SCHEMA.TABLES` +where REGEXP_CONTAINS(table_name, r'^\\d{{{date_length}}}$') +""".strip() # The existing implementation of this method can be found here: # https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/sqlalchemy_bigquery/base.py#L1018-L1025. @@ -707,11 +712,16 @@ def is_latest_shard(self, project_id: str, schema: str, table: str) -> bool: engine = self._get_engine(for_run_sql=True) if f"{project_id}.{schema}.{table_name}" not in self.maximum_shard_ids: with engine.connect() as con: - sql = BQ_GET_LATEST_SHARD.format( - project_id=project_id, - schema=schema, - table=table_name, - ) + if table_name is not None: + sql = BQ_GET_LATEST_SHARD.format( + project_id=project_id, + schema=schema, + table=table_name, + ) + else: + sql = BQ_GET_LATEST_DATE_TABLE.format( + project_id=project_id, schema=schema, date_length=len(shard) + ) result = con.execute(sql) for row in result: diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py index 2fe25679a9ab14..5f49d06bb0aeb1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py @@ -281,14 +281,16 @@ def remove_extras(self, sharded_table_regex: str) -> "BigQueryTableRef": if matches: table_name = matches.group(1) if matches: - logger.debug( - f"Found sharded table {self.table}. Using {table_name} as the table name." - ) if not table_name: logger.debug( f"Using dataset id {self.dataset} as table name because table only contains date value {self.table}" ) table_name = self.dataset + + logger.debug( + f"Found sharded table {self.table}. Using {table_name} as the table name." + ) + return BigQueryTableRef(self.project, self.dataset, table_name) # Handle table snapshots. From d812fd18fa187d03ca490547671b2a1b87f82648 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Wed, 29 Jun 2022 16:25:48 +0200 Subject: [PATCH 16/18] fix(ingest) redshift: Adding missing dependencies and relaxing sqlalchemy dependency (#5284) Relaxing sqlalchemy deps to make our plugins work with Airflow 2.3 --- metadata-ingestion/setup.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 053aed2acf8f75..e4703081fa71be 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -88,7 +88,7 @@ def get_long_description(): sql_common = { # Required for all SQL sources. - "sqlalchemy==1.3.24", + "sqlalchemy>=1.3.24,<2.0.0", # Required for SQL profiling. "great-expectations>=0.14.11,<0.15.3", # datahub does not depend on Jinja2 directly but great expectations does. With Jinja2 3.1.0 GE 0.14.11 is breaking @@ -107,6 +107,11 @@ def get_long_description(): "botocore!=1.23.0", } +path_spec_common = { + "parse>=1.19.0", + "wcmatch", +} + looker_common = { # Looker Python SDK "looker-sdk==22.2.1" @@ -121,6 +126,14 @@ def get_long_description(): "protobuf<=3.20.1", } +redshift_common = { + "sqlalchemy-redshift", + "psycopg2-binary", + "GeoAlchemy2", + "sqllineage==1.3.5", + *path_spec_common, +} + snowflake_common = { # Snowflake plugin utilizes sql common *sql_common, @@ -158,11 +171,7 @@ def get_long_description(): "azure-identity==1.10.0", } -s3_base = { - *data_lake_base, - "moto[s3]", - "wcmatch", -} +s3_base = {*data_lake_base, "moto[s3]", *path_spec_common} delta_lake = { *s3_base, @@ -173,6 +182,7 @@ def get_long_description(): "sqlparse", } + # Note: for all of these, framework_common will be added. plugins: Dict[str, Set[str]] = { # Sink plugins. @@ -249,16 +259,8 @@ def get_long_description(): | {"psycopg2-binary", "acryl-pyhive[hive]>=0.6.12", "pymysql>=1.0.2"}, "pulsar": {"requests"}, "redash": {"redash-toolbelt", "sql-metadata", "sqllineage==1.3.5"}, - "redshift": sql_common - | {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2", "sqllineage==1.3.5"}, - "redshift-usage": sql_common - | usage_common - | { - "sqlalchemy-redshift", - "psycopg2-binary", - "GeoAlchemy2", - "sqllineage==1.3.5", - }, + "redshift": sql_common | redshift_common, + "redshift-usage": sql_common | usage_common | redshift_common, "sagemaker": aws_common, "snowflake": snowflake_common, "snowflake-usage": snowflake_common @@ -454,7 +456,7 @@ def get_long_description(): entry_points = { "console_scripts": ["datahub = datahub.entrypoints:main"], "datahub.ingestion.source.plugins": [ - "csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource", + "csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource", "file = datahub.ingestion.source.file:GenericFileSource", "sqlalchemy = datahub.ingestion.source.sql.sql_generic:SQLAlchemyGenericSource", "athena = datahub.ingestion.source.sql.athena:AthenaSource", From c1f822769312a8ebc91a2c85873e44fd57431480 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Wed, 29 Jun 2022 18:23:36 +0200 Subject: [PATCH 17/18] fix(ingestion): Reverting sqlalchemy upgrade because it caused issues with mssql and redshift-usage (#5289) --- metadata-ingestion/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index e4703081fa71be..aca12aaebefd05 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -88,7 +88,7 @@ def get_long_description(): sql_common = { # Required for all SQL sources. - "sqlalchemy>=1.3.24,<2.0.0", + "sqlalchemy==1.3.24", # Required for SQL profiling. "great-expectations>=0.14.11,<0.15.3", # datahub does not depend on Jinja2 directly but great expectations does. With Jinja2 3.1.0 GE 0.14.11 is breaking From 9e58cd6ff12bb1611972ecd3dfd8767c52d07af7 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Wed, 29 Jun 2022 10:02:45 -0700 Subject: [PATCH 18/18] fix(Siblings): Have sibling hook use entity client (#5279) * fixing dbt platform issues * have sibling hook use entity client over entity service * switching search service as well * lint * more lint * more specific exceptions --- .../hook/siblings/SiblingAssociationHook.java | 137 ++++++++++++++---- .../siblings/SiblingAssociationHookTest.java | 90 +++++++----- ...com.linkedin.entity.entities.restspec.json | 7 + ...com.linkedin.entity.entities.snapshot.json | 7 + .../linkedin/entity/client/EntityClient.java | 2 + .../entity/client/JavaEntityClient.java | 5 + .../entity/client/RestliEntityClient.java | 8 + .../resources/entity/EntityResource.java | 11 ++ .../example_siblings_to_datahub_rest.yml | 11 ++ 9 files changed, 215 insertions(+), 63 deletions(-) create mode 100644 smoke-test/tests/cypress/example_siblings_to_datahub_rest.yml diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java index f656b1ab66b0d3..d8808c5b5abeb8 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java @@ -1,7 +1,9 @@ package com.linkedin.metadata.kafka.hook.siblings; +import com.datahub.authentication.Authentication; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import com.linkedin.common.AuditStamp; import com.linkedin.common.Siblings; import com.linkedin.common.SubTypes; @@ -10,21 +12,25 @@ import com.linkedin.common.urn.Urn; import com.linkedin.dataset.UpstreamArray; import com.linkedin.dataset.UpstreamLineage; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.RestliEntityClient; import com.linkedin.events.metadata.ChangeType; -import com.linkedin.gms.factory.entity.EntityServiceFactory; +import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; +import com.linkedin.gms.factory.entity.RestliEntityClientFactory; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; -import com.linkedin.gms.factory.search.SearchServiceFactory; -import com.linkedin.metadata.entity.EntityService; +import com.linkedin.gms.factory.search.EntitySearchServiceFactory; +import com.linkedin.metadata.Constants; import com.linkedin.metadata.kafka.hook.MetadataChangeLogHook; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.SearchResult; -import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.r2.RemoteInvocationException; import java.net.URISyntaxException; import java.util.List; import java.util.stream.Collectors; @@ -53,7 +59,7 @@ @Slf4j @Component @Singleton -@Import({EntityRegistryFactory.class, EntityServiceFactory.class, SearchServiceFactory.class}) +@Import({EntityRegistryFactory.class, RestliEntityClientFactory.class, EntitySearchServiceFactory.class, SystemAuthenticationFactory.class}) public class SiblingAssociationHook implements MetadataChangeLogHook { public static final String SIBLING_ASSOCIATION_SYSTEM_ACTOR = "urn:li:corpuser:__datahub_system_sibling_hook"; @@ -61,18 +67,21 @@ public class SiblingAssociationHook implements MetadataChangeLogHook { public static final String SOURCE_SUBTYPE = "source"; private final EntityRegistry _entityRegistry; - private final EntityService _entityService; - private final SearchService _searchService; + private final RestliEntityClient _entityClient; + private final EntitySearchService _searchService; + private final Authentication _systemAuthentication; @Autowired public SiblingAssociationHook( @Nonnull final EntityRegistry entityRegistry, - @Nonnull final EntityService entityService, - @Nonnull final SearchService searchService + @Nonnull final RestliEntityClient entityClient, + @Nonnull final EntitySearchService searchService, + @Nonnull final Authentication systemAuthentication ) { _entityRegistry = entityRegistry; - _entityService = entityService; + _entityClient = entityClient; _searchService = searchService; + _systemAuthentication = systemAuthentication; } @Value("${siblings.enabled:false}") @@ -123,8 +132,7 @@ private void handleEntityKeyEvent(DatasetUrn datasetUrn) { entitiesWithYouAsSiblingFilter, null, 0, - 10, - null); + 10); // we have a match of an entity with you as a sibling, associate yourself back searchResult.getEntities().forEach(entity -> { @@ -146,21 +154,12 @@ private void handleDbtDatasetEvent(MetadataChangeLog event, DatasetUrn datasetUr if (event.getAspectName().equals(UPSTREAM_LINEAGE_ASPECT_NAME)) { upstreamLineage = getUpstreamLineageFromEvent(event); - subTypesAspectOfEntity = - (SubTypes) _entityService.getLatestAspect( - datasetUrn, - SUB_TYPES_ASPECT_NAME - ); - + subTypesAspectOfEntity = getSubtypesFromEntityClient(datasetUrn); } if (event.getAspectName().equals(SUB_TYPES_ASPECT_NAME)) { subTypesAspectOfEntity = getSubtypesFromEvent(event); - upstreamLineage = - (UpstreamLineage) _entityService.getLatestAspect( - datasetUrn, - UPSTREAM_LINEAGE_ASPECT_NAME - ); + upstreamLineage = getUpstreamLineageFromEntityClient(datasetUrn); } if ( @@ -195,10 +194,8 @@ private void handleSourceDatasetEvent(MetadataChangeLog event, DatasetUrn source } private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) { - Siblings existingDbtSiblingAspect = - (Siblings) _entityService.getLatestAspect(dbtUrn, SIBLINGS_ASPECT_NAME); - Siblings existingSourceSiblingAspect = - (Siblings) _entityService.getLatestAspect(sourceUrn, SIBLINGS_ASPECT_NAME); + Siblings existingDbtSiblingAspect = getSiblingsFromEntityClient(dbtUrn); + Siblings existingSourceSiblingAspect = getSiblingsFromEntityClient(sourceUrn); log.info("Associating {} and {} as siblings.", dbtUrn.toString(), sourceUrn.toString()); @@ -228,7 +225,12 @@ private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) { dbtSiblingProposal.setChangeType(ChangeType.UPSERT); dbtSiblingProposal.setEntityUrn(dbtUrn); - _entityService.ingestProposal(dbtSiblingProposal, auditStamp); + try { + _entityClient.ingestProposal(dbtSiblingProposal, _systemAuthentication); + } catch (RemoteInvocationException e) { + log.error("Error while associating {} with {}: {}", dbtUrn.toString(), sourceUrn.toString(), e.toString()); + throw new RuntimeException("Error ingesting sibling proposal. Skipping processing.", e); + } // set dbt as a sibling of source @@ -245,7 +247,14 @@ private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) { // clean up any references to stale siblings that have been deleted List filteredNewSiblingsArray = - newSiblingsUrnArray.stream().filter(urn -> _entityService.exists(urn)).collect(Collectors.toList()); + newSiblingsUrnArray.stream().filter(urn -> { + try { + return _entityClient.exists(urn, _systemAuthentication); + } catch (RemoteInvocationException e) { + log.error("Error while checking existence of {}: {}", urn.toString(), e.toString()); + throw new RuntimeException("Error checking existence. Skipping processing.", e); + } + }).collect(Collectors.toList()); sourceSiblingAspect.setSiblings(new UrnArray(filteredNewSiblingsArray)); sourceSiblingAspect.setPrimary(false); @@ -259,7 +268,12 @@ private void setSiblingsAndSoftDeleteSibling(Urn dbtUrn, Urn sourceUrn) { sourceSiblingProposal.setChangeType(ChangeType.UPSERT); sourceSiblingProposal.setEntityUrn(sourceUrn); - _entityService.ingestProposal(sourceSiblingProposal, auditStamp); + try { + _entityClient.ingestProposal(sourceSiblingProposal, _systemAuthentication); + } catch (RemoteInvocationException e) { + log.error("Error while associating {} with {}: {}", dbtUrn.toString(), sourceUrn.toString(), e.toString()); + throw new RuntimeException("Error ingesting sibling proposal. Skipping processing.", e); + } } @@ -362,4 +376,67 @@ private Filter createFilterForEntitiesWithYouAsSibling( return filter; } + private SubTypes getSubtypesFromEntityClient( + final Urn urn + ) { + try { + EntityResponse entityResponse = _entityClient.getV2( + DATASET_ENTITY_NAME, + urn, + ImmutableSet.of(SUB_TYPES_ASPECT_NAME), + _systemAuthentication + ); + + if (entityResponse != null && entityResponse.hasAspects() && entityResponse.getAspects().containsKey(Constants.SUB_TYPES_ASPECT_NAME)) { + return new SubTypes(entityResponse.getAspects().get(Constants.SUB_TYPES_ASPECT_NAME).getValue().data()); + } else { + return null; + } + } catch (RemoteInvocationException | URISyntaxException e) { + throw new RuntimeException("Failed to retrieve Subtypes", e); + } + } + + private UpstreamLineage getUpstreamLineageFromEntityClient( + final Urn urn + ) { + try { + EntityResponse entityResponse = _entityClient.getV2( + DATASET_ENTITY_NAME, + urn, + ImmutableSet.of(UPSTREAM_LINEAGE_ASPECT_NAME), + _systemAuthentication + ); + + if (entityResponse != null && entityResponse.hasAspects() && entityResponse.getAspects().containsKey(Constants.UPSTREAM_LINEAGE_ASPECT_NAME)) { + return new UpstreamLineage(entityResponse.getAspects().get(Constants.UPSTREAM_LINEAGE_ASPECT_NAME).getValue().data()); + } else { + return null; + } + } catch (RemoteInvocationException | URISyntaxException e) { + throw new RuntimeException("Failed to retrieve UpstreamLineage", e); + } + } + + private Siblings getSiblingsFromEntityClient( + final Urn urn + ) { + try { + EntityResponse entityResponse = _entityClient.getV2( + DATASET_ENTITY_NAME, + urn, + ImmutableSet.of(SIBLINGS_ASPECT_NAME), + _systemAuthentication + ); + + if (entityResponse != null && entityResponse.hasAspects() && entityResponse.getAspects().containsKey(Constants.SIBLINGS_ASPECT_NAME)) { + return new Siblings(entityResponse.getAspects().get(Constants.SIBLINGS_ASPECT_NAME).getValue().data()); + } else { + return null; + } + } catch (RemoteInvocationException | URISyntaxException e) { + throw new RuntimeException("Failed to retrieve UpstreamLineage", e); + } + } + } diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java index 190cd922ee15ea..3ab8175115646d 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHookTest.java @@ -1,7 +1,8 @@ package com.linkedin.metadata.kafka.hook.siblings; +import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; +import com.google.common.collect.ImmutableSet; import com.linkedin.common.FabricType; import com.linkedin.common.Siblings; import com.linkedin.common.SubTypes; @@ -14,15 +15,19 @@ import com.linkedin.dataset.Upstream; import com.linkedin.dataset.UpstreamArray; import com.linkedin.dataset.UpstreamLineage; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.entity.client.RestliEntityClient; import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.key.DatasetKey; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.search.SearchResult; -import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.MetadataChangeProposal; @@ -36,16 +41,18 @@ public class SiblingAssociationHookTest { private SiblingAssociationHook _siblingAssociationHook; - EntityService _mockEntityService; - SearchService _mockSearchService; + RestliEntityClient _mockEntityClient; + EntitySearchService _mockSearchService; + Authentication _mockAuthentication; @BeforeMethod public void setupTest() { EntityRegistry registry = new ConfigEntityRegistry( SiblingAssociationHookTest.class.getClassLoader().getResourceAsStream("test-entity-registry-siblings.yml")); - _mockEntityService = Mockito.mock(EntityService.class); - _mockSearchService = Mockito.mock(SearchService.class); - _siblingAssociationHook = new SiblingAssociationHook(registry, _mockEntityService, _mockSearchService); + _mockEntityClient = Mockito.mock(RestliEntityClient.class); + _mockSearchService = Mockito.mock(EntitySearchService.class); + _mockAuthentication = Mockito.mock(Authentication.class); + _siblingAssociationHook = new SiblingAssociationHook(registry, _mockEntityClient, _mockSearchService, _mockAuthentication); _siblingAssociationHook.setEnabled(true); } @@ -53,14 +60,21 @@ public void setupTest() { public void testInvokeWhenThereIsAPairWithDbtSourceNode() throws Exception { SubTypes mockSourceSubtypesAspect = new SubTypes(); mockSourceSubtypesAspect.setTypeNames(new StringArray(ImmutableList.of("source"))); + EnvelopedAspectMap mockResponseMap = new EnvelopedAspectMap(); + mockResponseMap.put(SUB_TYPES_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(mockSourceSubtypesAspect.data()))); + EntityResponse mockResponse = new EntityResponse(); + mockResponse.setAspects(mockResponseMap); + + Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); - Mockito.when(_mockEntityService.exists(Mockito.any())).thenReturn(true); Mockito.when( - _mockEntityService.getLatestAspect( + _mockEntityClient.getV2( + DATASET_ENTITY_NAME, Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)"), - SUB_TYPES_ASPECT_NAME - )).thenReturn(mockSourceSubtypesAspect); + ImmutableSet.of(SUB_TYPES_ASPECT_NAME), + _mockAuthentication + )).thenReturn(mockResponse); MetadataChangeLog event = new MetadataChangeLog(); event.setEntityType(DATASET_ENTITY_NAME); @@ -90,9 +104,9 @@ public void testInvokeWhenThereIsAPairWithDbtSourceNode() throws Exception { proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityService, Mockito.times(1)).ingestProposal( + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( Mockito.eq(proposal), - Mockito.any(AuditStamp.class) + Mockito.eq(_mockAuthentication) ); final Siblings sourceSiblingsAspect = new Siblings() @@ -106,9 +120,9 @@ public void testInvokeWhenThereIsAPairWithDbtSourceNode() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(sourceSiblingsAspect)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityService, Mockito.times(1)).ingestProposal( + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( Mockito.eq(proposal2), - Mockito.any(AuditStamp.class) + Mockito.eq(_mockAuthentication) ); } @@ -117,13 +131,23 @@ public void testInvokeWhenThereIsNoPairWithDbtModel() throws Exception { SubTypes mockSourceSubtypesAspect = new SubTypes(); mockSourceSubtypesAspect.setTypeNames(new StringArray(ImmutableList.of("model"))); - Mockito.when(_mockEntityService.exists(Mockito.any())).thenReturn(true); + Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); + + EnvelopedAspectMap mockResponseMap = new EnvelopedAspectMap(); + mockResponseMap.put(SUB_TYPES_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(mockSourceSubtypesAspect.data()))); + EntityResponse mockResponse = new EntityResponse(); + mockResponse.setAspects(mockResponseMap); + + Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); + Mockito.when( - _mockEntityService.getLatestAspect( + _mockEntityClient.getV2( + DATASET_ENTITY_NAME, Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:dbt,my-proj.jaffle_shop.customers,PROD)"), - SUB_TYPES_ASPECT_NAME - )).thenReturn(mockSourceSubtypesAspect); + ImmutableSet.of(SUB_TYPES_ASPECT_NAME), + _mockAuthentication + )).thenReturn(mockResponse); MetadataChangeLog event = new MetadataChangeLog(); event.setEntityType(DATASET_ENTITY_NAME); @@ -153,15 +177,15 @@ public void testInvokeWhenThereIsNoPairWithDbtModel() throws Exception { proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityService, Mockito.times(0)).ingestProposal( + Mockito.verify(_mockEntityClient, Mockito.times(0)).ingestProposal( Mockito.eq(proposal), - Mockito.any(AuditStamp.class) + Mockito.eq(_mockAuthentication) ); } @Test public void testInvokeWhenThereIsAPairWithBigqueryDownstreamNode() throws Exception { - Mockito.when(_mockEntityService.exists(Mockito.any())).thenReturn(true); + Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); MetadataChangeLog event = new MetadataChangeLog(); event.setEntityType(DATASET_ENTITY_NAME); @@ -191,9 +215,9 @@ public void testInvokeWhenThereIsAPairWithBigqueryDownstreamNode() throws Except proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityService, Mockito.times(1)).ingestProposal( + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( Mockito.eq(proposal), - Mockito.any(AuditStamp.class) + Mockito.eq(_mockAuthentication) ); final Siblings sourceSiblingsAspect = new Siblings() @@ -207,15 +231,15 @@ public void testInvokeWhenThereIsAPairWithBigqueryDownstreamNode() throws Except proposal2.setAspect(GenericRecordUtils.serializeAspect(sourceSiblingsAspect)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityService, Mockito.times(1)).ingestProposal( + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( Mockito.eq(proposal2), - Mockito.any(AuditStamp.class) + Mockito.eq(_mockAuthentication) ); } @Test public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception { - Mockito.when(_mockEntityService.exists(Mockito.any())).thenReturn(true); + Mockito.when(_mockEntityClient.exists(Mockito.any(), Mockito.any())).thenReturn(true); SearchResult returnSearchResult = new SearchResult(); SearchEntityArray returnEntityArray = new SearchEntityArray(); @@ -229,7 +253,7 @@ public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception { Mockito.when( _mockSearchService.search( - anyString(), anyString(), any(), any(), anyInt(), anyInt(), any() + anyString(), anyString(), any(), any(), anyInt(), anyInt() )).thenReturn(returnSearchResult); MetadataChangeLog event = new MetadataChangeLog(); @@ -256,9 +280,9 @@ public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception { proposal.setAspect(GenericRecordUtils.serializeAspect(dbtSiblingsAspect)); proposal.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityService, Mockito.times(1)).ingestProposal( + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( Mockito.eq(proposal), - Mockito.any(AuditStamp.class) + Mockito.eq(_mockAuthentication) ); final Siblings sourceSiblingsAspect = new Siblings() @@ -272,9 +296,9 @@ public void testInvokeWhenThereIsAKeyBeingReingested() throws Exception { proposal2.setAspect(GenericRecordUtils.serializeAspect(sourceSiblingsAspect)); proposal2.setChangeType(ChangeType.UPSERT); - Mockito.verify(_mockEntityService, Mockito.times(1)).ingestProposal( + Mockito.verify(_mockEntityClient, Mockito.times(1)).ingestProposal( Mockito.eq(proposal2), - Mockito.any(AuditStamp.class) + Mockito.eq(_mockAuthentication) ); } } diff --git a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entities.restspec.json b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entities.restspec.json index 07273487808c9c..9116dd03c8733b 100644 --- a/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entities.restspec.json +++ b/metadata-service/restli-api/src/main/idl/com.linkedin.entity.entities.restspec.json @@ -114,6 +114,13 @@ "optional" : true } ], "returns" : "com.linkedin.metadata.run.DeleteReferencesResponse" + }, { + "name" : "exists", + "parameters" : [ { + "name" : "urn", + "type" : "string" + } ], + "returns" : "boolean" }, { "name" : "filter", "parameters" : [ { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index a1cd7337889ef9..3a7dee437cfc61 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -5679,6 +5679,13 @@ "optional" : true } ], "returns" : "com.linkedin.metadata.run.DeleteReferencesResponse" + }, { + "name" : "exists", + "parameters" : [ { + "name" : "urn", + "type" : "string" + } ], + "returns" : "boolean" }, { "name" : "filter", "parameters" : [ { diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java index 734bf2d6aadae7..1ebe7fe3674b14 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java @@ -296,4 +296,6 @@ public DataMap getRawAspect(@Nonnull String urn, @Nonnull String aspect, @Nonnul public void producePlatformEvent(@Nonnull String name, @Nullable String key, @Nonnull PlatformEvent event, @Nonnull Authentication authentication) throws Exception; + + Boolean exists(Urn urn, @Nonnull Authentication authentication) throws RemoteInvocationException; } diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java index d3587820a79359..d2ca0906569c8a 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/JavaEntityClient.java @@ -463,4 +463,9 @@ public void producePlatformEvent( @Nonnull Authentication authentication) throws Exception { _eventProducer.producePlatformEvent(name, key, event); } + + @Override + public Boolean exists(Urn urn, @Nonnull Authentication authentication) throws RemoteInvocationException { + return _entityService.exists(urn); + } } diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java index 3f301a020c274b..d580d4f2392da1 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java @@ -20,6 +20,7 @@ import com.linkedin.entity.EntitiesDoBrowseRequestBuilder; import com.linkedin.entity.EntitiesDoDeleteReferencesRequestBuilder; import com.linkedin.entity.EntitiesDoDeleteRequestBuilder; +import com.linkedin.entity.EntitiesDoExistsRequestBuilder; import com.linkedin.entity.EntitiesDoFilterRequestBuilder; import com.linkedin.entity.EntitiesDoGetBrowsePathsRequestBuilder; import com.linkedin.entity.EntitiesDoIngestRequestBuilder; @@ -662,4 +663,11 @@ public void producePlatformEvent(@Nonnull String name, @Nullable String key, @No } sendClientRequest(requestBuilder, authentication); } + + @Override + public Boolean exists(Urn urn, @Nonnull Authentication authentication) throws RemoteInvocationException { + final EntitiesDoExistsRequestBuilder requestBuilder = + ENTITIES_REQUEST_BUILDERS.actionExists().urnParam(urn.toString()); + return sendClientRequest(requestBuilder, authentication).getEntity(); + } } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index ea6dc4f76551a6..c0f2e2200ba49d 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -89,6 +89,7 @@ public class EntityResource extends CollectionResourceTaskTemplate filter(@ActionParam(PARAM_ENTITY) @Nonnull String enti return RestliUtil.toTask(() -> _entitySearchService.filter(entityName, filter, sortCriterion, start, count), MetricRegistry.name(this.getClass(), "search")); } + + @Action(name = ACTION_EXISTS) + @Nonnull + @WithSpan + public Task exists(@ActionParam(PARAM_URN) @Nonnull String urnStr) throws URISyntaxException { + log.info("EXISTS for {}", urnStr); + Urn urn = Urn.createFromString(urnStr); + return RestliUtil.toTask(() -> _entityService.exists(urn), + MetricRegistry.name(this.getClass(), "exists")); + } } diff --git a/smoke-test/tests/cypress/example_siblings_to_datahub_rest.yml b/smoke-test/tests/cypress/example_siblings_to_datahub_rest.yml new file mode 100644 index 00000000000000..89b259a9ea3c3c --- /dev/null +++ b/smoke-test/tests/cypress/example_siblings_to_datahub_rest.yml @@ -0,0 +1,11 @@ +# see https://datahubproject.io/docs/generated/ingestion/sources/file for complete documentation +source: + type: "file" + config: + filename: "./cypress_dbt_data.json" + +# see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation +sink: + type: "datahub-rest" + config: + server: "http://localhost:8080"