From f2de1d9adec0efe3e5bd8db5817a73a4e436d06e Mon Sep 17 00:00:00 2001 From: leifker Date: Thu, 17 Feb 2022 19:22:11 -0600 Subject: [PATCH] feat(ingestion): add java protobuf schema ingestion Adding additional submodule to allow ingestion of compiled protobuf binaries. --- build.gradle | 29 +- docs-website/sidebars.js | 1 + .../java/datahub-protobuf/README.md | 221 ++++++++++ .../java/datahub-protobuf/build.gradle | 53 +++ .../google/protobuf/ExtensionRegistry.java | 388 ++++++++++++++++++ .../datahub/protobuf/ProtobufDataset.java | 236 +++++++++++ .../java/datahub/protobuf/ProtobufUtils.java | 101 +++++ .../datahub/protobuf/model/FieldTypeEdge.java | 55 +++ .../protobuf/model/ProtobufElement.java | 32 ++ .../datahub/protobuf/model/ProtobufEnum.java | 87 ++++ .../datahub/protobuf/model/ProtobufField.java | 239 +++++++++++ .../datahub/protobuf/model/ProtobufGraph.java | 351 ++++++++++++++++ .../protobuf/model/ProtobufMessage.java | 122 ++++++ .../protobuf/model/ProtobufOneOfField.java | 63 +++ .../visitors/ProtobufExtensionUtil.java | 109 +++++ .../visitors/ProtobufModelVisitor.java | 24 ++ .../protobuf/visitors/VisitContext.java | 58 +++ .../visitors/dataset/DatasetVisitor.java | 79 ++++ .../visitors/dataset/DescriptionVisitor.java | 14 + .../dataset/InstitutionalMemoryVisitor.java | 140 +++++++ .../dataset/KafkaTopicPropertyVisitor.java | 31 ++ .../ProtobufExtensionPropertyVisitor.java | 43 ++ .../ProtobufExtensionTagAssocVisitor.java | 20 + .../ProtobufExtensionTermAssocVisitor.java | 17 + .../field/ProtobufExtensionFieldVisitor.java | 91 ++++ .../visitors/field/SchemaFieldVisitor.java | 25 ++ .../tags/ProtobufExtensionTagVisitor.java | 40 ++ .../src/main/resources/protobuf/meta.proto | 38 ++ .../protobuf/meta_field_options.proto | 30 ++ .../protobuf/meta_message_options.proto | 29 ++ .../datahub/protobuf/ProtobufDatasetTest.java | 382 +++++++++++++++++ .../datahub/protobuf/ProtobufUtilsTest.java | 37 ++ .../java/datahub/protobuf/TestFixtures.java | 78 ++++ .../protobuf/model/ProtobufEnumTest.java | 80 ++++ .../protobuf/model/ProtobufFieldTest.java | 230 +++++++++++ .../protobuf/model/ProtobufGraphTest.java | 71 ++++ .../protobuf/model/ProtobufMessageTest.java | 180 ++++++++ .../model/ProtobufOneOfFieldTest.java | 121 ++++++ .../visitors/dataset/DatasetVisitorTest.java | 56 +++ .../dataset/DescriptionVisitorTest.java | 27 ++ .../InstitutionalMemoryVisitorTest.java | 68 +++ .../KafkaTopicPropertyVisitorTest.java | 36 ++ .../ProtobufExtensionPropertyVisitorTest.java | 46 +++ ...ProtobufExtensionTermAssocVisitorTest.java | 38 ++ .../ProtobufExtensionFieldVisitorTest.java | 218 ++++++++++ .../field/SchemaFieldVisitorTest.java | 68 +++ .../tag/ProtobufExtensionTagVisitorTest.java | 59 +++ .../extended_protobuf/messageA.proto | 28 ++ .../extended_protobuf/messageA.protoc | Bin 0 -> 56167 bytes .../extended_protobuf/messageB.proto | 33 ++ .../extended_protobuf/messageB.protoc | Bin 0 -> 56430 bytes .../extended_protobuf/meta/meta.proto | 38 ++ .../meta/meta_field_options.proto | 28 ++ .../meta/meta_message_options.proto | 28 ++ .../test/resources/protobuf/messageA.proto | 46 +++ .../test/resources/protobuf/messageA.protoc | Bin 0 -> 1781 bytes .../test/resources/protobuf/messageB.proto | 17 + .../test/resources/protobuf/messageB.protoc | Bin 0 -> 7030 bytes .../test/resources/protobuf/messageC.proto | 14 + .../test/resources/protobuf/messageC.protoc | Bin 0 -> 506 bytes .../test/resources/protobuf/messageC2.proto | 22 + .../test/resources/protobuf/messageC2.protoc | Bin 0 -> 815 bytes .../resources/protobuf/messageEmpty.proto | 6 + .../resources/protobuf/messageEmpty.protoc | Bin 0 -> 79 bytes settings.gradle | 1 + 65 files changed, 4815 insertions(+), 7 deletions(-) create mode 100644 metadata-integration/java/datahub-protobuf/README.md create mode 100644 metadata-integration/java/datahub-protobuf/build.gradle create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/com/google/protobuf/ExtensionRegistry.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufDataset.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufUtils.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/FieldTypeEdge.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufElement.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufEnum.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufGraph.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufMessage.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufOneOfField.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/ProtobufExtensionUtil.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/ProtobufModelVisitor.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/VisitContext.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/DatasetVisitor.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/DescriptionVisitor.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitor.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitor.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/ProtobufExtensionPropertyVisitor.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/ProtobufExtensionTagAssocVisitor.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/ProtobufExtensionTermAssocVisitor.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitor.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/field/SchemaFieldVisitor.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/tags/ProtobufExtensionTagVisitor.java create mode 100644 metadata-integration/java/datahub-protobuf/src/main/resources/protobuf/meta.proto create mode 100644 metadata-integration/java/datahub-protobuf/src/main/resources/protobuf/meta_field_options.proto create mode 100644 metadata-integration/java/datahub-protobuf/src/main/resources/protobuf/meta_message_options.proto create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/TestFixtures.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/ProtobufExtensionPropertyVisitorTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/ProtobufExtensionTermAssocVisitorTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/ProtobufExtensionTagVisitorTest.java create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageA.proto create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageA.protoc create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageB.proto create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageB.protoc create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/meta/meta.proto create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/meta/meta_field_options.proto create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/meta/meta_message_options.proto create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageA.proto create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageA.protoc create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageB.proto create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageB.protoc create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageC.proto create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageC.protoc create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageC2.proto create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageC2.protoc create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageEmpty.proto create mode 100644 metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageEmpty.protoc diff --git a/build.gradle b/build.gradle index 80dcdbe7f72ab6..17c3f05d26428f 100644 --- a/build.gradle +++ b/build.gradle @@ -78,6 +78,7 @@ project.ext.externalDependency = [ 'jerseyCore': 'org.glassfish.jersey.core:jersey-client:2.25.1', 'jerseyGuava': 'org.glassfish.jersey.bundles.repackaged:jersey-guava:2.25.1', 'jettyJaas': 'org.eclipse.jetty:jetty-jaas:9.4.28.v20200408', + 'jgrapht': 'org.jgrapht:jgrapht-core:1.5.1', 'jsonSimple': 'com.googlecode.json-simple:json-simple:1.1.1', 'junitJupiterApi': "org.junit.jupiter:junit-jupiter-api:$junitJupiterVersion", 'junitJupiterParams': "org.junit.jupiter:junit-jupiter-params:$junitJupiterVersion", @@ -110,6 +111,7 @@ project.ext.externalDependency = [ 'pac4j': 'org.pac4j:pac4j-oidc:3.6.0', 'playPac4j': 'org.pac4j:play-pac4j_2.11:7.0.1', 'postgresql': 'org.postgresql:postgresql:42.3.3', + 'protobuf': 'com.google.protobuf:protobuf-java:3.19.3', 'reflections': 'org.reflections:reflections:0.9.9', 'resilience4j': 'io.github.resilience4j:resilience4j-retry:1.7.1', 'rythmEngine': 'org.rythmengine:rythm-engine:1.3.0', @@ -194,14 +196,27 @@ subprojects { } } - tasks.withType(JavaCompile).configureEach { - javaCompiler = javaToolchains.compilerFor { - languageVersion = JavaLanguageVersion.of(8) + if (project.name != 'datahub-protobuf') { + tasks.withType(JavaCompile).configureEach { + javaCompiler = javaToolchains.compilerFor { + languageVersion = JavaLanguageVersion.of(8) + } } - } - tasks.withType(Test).configureEach { - javaLauncher = javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(8) + tasks.withType(Test).configureEach { + javaLauncher = javaToolchains.launcherFor { + languageVersion = JavaLanguageVersion.of(8) + } + } + } else { + tasks.withType(JavaCompile).configureEach { + javaCompiler = javaToolchains.compilerFor { + languageVersion = JavaLanguageVersion.of(11) + } + } + tasks.withType(Test).configureEach { + javaLauncher = javaToolchains.launcherFor { + languageVersion = JavaLanguageVersion.of(11) + } } } diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 4f7c4821238efa..1f32518bbfd918 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -82,6 +82,7 @@ module.exports = { "metadata-ingestion/as-a-library", "metadata-integration/java/as-a-library", "metadata-ingestion/integration_docs/great-expectations", + "metadata-integration/java/datahub-protobuf/README", ], }, { diff --git a/metadata-integration/java/datahub-protobuf/README.md b/metadata-integration/java/datahub-protobuf/README.md new file mode 100644 index 00000000000000..3a805091d36d58 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/README.md @@ -0,0 +1,221 @@ +# Protobuf Integration + +This module is designed to be used with the Java Emitter, the input is a compiled protobuf binary `*.protoc` files +and optionally the corresponding `*.proto` source code. In addition, you can supply the root message in cases +where a single protobuf source file includes multiple non-nested messages. + +## Supported Features + +The following protobuf features are supported and are translated into descriptions, tags, properties and terms on a +dataset. + + * C++/C style code comments on Messages and Fields + * Nested Types + * Scalar Values + * Well Known Type Wrappers (i.e. DoubleValue, FloatValue, StringValue) + * Enumerations + * Oneof + * Maps + * Extensions + * Web links + * Parsing of GitHub team names and slack channel references + +## Usage + +### Protobuf Compile Options + +In order to support parsing comments the following option flags should be used during `protoc` compilation. + + protoc --include_imports --include_source_info --descriptor_set_out=MyProto.protoc MyProto.proto + +### Code Example + +Given an input stream of the `protoc` binary and the emitter the minimal code is shown below. + +```java +import com.linkedin.common.FabricType; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.CorpuserUrn; +import datahub.client.rest.RestEmitter; +import datahub.protobuf.ProtobufDataset; + +RestEmitter emitter; +InputStream protocInputStream; + +AuditStamp auditStamp = new AuditStamp() + .setTime(System.currentTimeMillis()) + .setActor(new CorpuserUrn("datahub")); + +ProtobufDataset dataset = ProtobufDataset.builder() + .setDataPlatformUrn(new DataPlatformUrn("kafka")) + .setProtocIn(protocInputStream) + .setAuditStamp(auditStamp) + .setFabricType(FabricType.DEV) + .build(); + +dataset.getAllMetadataChangeProposals().forEach(mcpw -> emitter.emit(mcpw, null).get()); +``` + +Additionally, the raw protobuf source can be included as well as information to allow parsing of additional +references to GitHub and Slack in the source code comments. + +```java +ProtobufDataset dataset = ProtobufDataset.builder() + .setDataPlatformUrn(new DataPlatformUrn("kafka")) + .setSchema(" my raw protobuf schema ") + .setProtocIn(protocInputStream) + .setAuditStamp(auditStamp) + .setFabricType(FabricType.DEV) + .setGithubOrganization("myOrg") + .setSlackTeamId("SLACK123") + .build(); +``` + +### Protobuf Extensions + +In order to extract even more metadata from the protobuf schema we can extend the FieldOptions and MessageOptions +to be able to annotate Messages and Fields with arbitrary information. This information can then be emitted +as DataHub primary key information, tags, glossary terms or properties on the dataset. + +An annotated protobuf schema would look like the following, except for the `is_primary_key` all +annotations are configurable for individual needs. + +*Note*: Extending FieldOptions and MessageOptions does not change the messages themselves. The metadata is not included +in messages being sent over the wire. + +```protobuf +syntax = "proto3"; +import "meta.proto"; + +message Department { + int32 id = 1 [(meta.fld.is_primary_key) = true]; + string name = 2; +} + +message Person { + option(meta.msg.type) = ENTITY; + option(meta.msg.classification_enum) = HighlyConfidential; + option(meta.msg.team) = "TeamB"; + option(meta.msg.bool_feature) = true; + option(meta.msg.alert_channel) = "#alerts"; + + string name = 1 [(meta.fld.classification) = "Classification.HighlyConfidential"]; + + int32 id = 2 + [(meta.fld.is_primary_key) = true]; + + string email = 3 + [(meta.fld.classification_enum) = Confidential]; + + Department dept = 4; + + string test_coverage = 5 + [(meta.fld.product_type_bool) = true, (meta.fld.product_type) = "my type", (meta.fld.product_type_enum) = EVENT]; +} +``` + +#### meta.proto + +In order to use the annotations above, create a proto file called `meta.proto`. Feel free to customize the kinds of +metadata and how it is emitted to DataHub for your use cases. + +```protobuf +syntax = "proto3"; +package meta; + +import "google/protobuf/descriptor.proto"; + +/* + This is assigned to metadata fields. It describes how the metadata field should be represented + in DataHub. This enum must be used in the `meta` package. Multiple can be used for the same + metadata annotation. This allows a single piece of information to be captured in DataHub + as a property, tag and/or term. + + Tags can be strings, enums, or booleans + Terms can be strings or enums + Properties should be strings + +*/ +enum DataHubMetadataType { + PROPERTY = 0; // Datahub Custom Property + TAG = 1; // Datahub Tag + TERM = 2; // Datahub Term +} + +/* + Example below: The following is not required for annotation processing. This is an example + of creating an annotation using an enum. + */ + +enum MetaEnumExample { + UNKNOWN = 0; + ENTITY = 1; + EVENT = 2; +} + +// Assuming Glossary Term defined from bootstrap example +enum Classification { + HighlyConfidential = 0; + Confidential = 1; + Sensitive = 2; +} +``` + +#### FieldOptions + +Define possible annotations on fields and how they are exported to DataHub. + +```protobuf + +message fld { + extend google.protobuf.FieldOptions { + // Required: Mark option field with how to export to DataHub in one or more places. + repeated meta.DataHubMetadataType type = 6000; + + /* + Examples below: The following is not required for annotation processing. + */ + + // Set true if the field is a primary key. This works for any boolean with `primary_key` in it. + bool is_primary_key = 6010; + + // Extract classification field option as a Term, either works + string classification = 6001 [(meta.fld.type) = TERM]; + meta.Classification classification_enum = 6002 [(meta.fld.type) = TERM]; + + // Expose this option as a tag on the field. + string product_type = 70004 [(meta.fld.type) = TAG]; + bool product_type_bool = 70005 [(meta.fld.type) = TAG]; + meta.MetaEnumExample product_type_enum = 70006 [(meta.fld.type) = TAG]; + } +} +``` + +#### MessageOptions + +Define possible annotations on messages and how they are exported to DataHub. + +```protobuf + +message msg { + extend google.protobuf.MessageOptions { + /* + Examples below: The following is not required for annotation processing. + */ + + // Place the classification term at the Message/Dataset level, either string or enum is supported + string classification = 4000 [(meta.fld.type) = TERM, (meta.fld.type) = PROPERTY]; + meta.Classification classification_enum = 4001 [(meta.fld.type) = TERM, (meta.fld.type) = PROPERTY]; + + // Attach these Message/Dataset options as a tag and property. + string product = 5001 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + string project = 5002 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + string team = 5003 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + + string domain = 60003 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + meta.MetaEnumExample type = 60004 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + bool bool_feature = 60005 [(meta.fld.type) = TAG]; + string alert_channel = 60007 [(meta.fld.type) = PROPERTY]; + } +} +``` diff --git a/metadata-integration/java/datahub-protobuf/build.gradle b/metadata-integration/java/datahub-protobuf/build.gradle new file mode 100644 index 00000000000000..8a0c09d533e8bd --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/build.gradle @@ -0,0 +1,53 @@ +plugins { + id("com.palantir.git-version") apply false +} +apply plugin: 'java' +apply plugin: 'jacoco' + +afterEvaluate { + if (project.plugins.hasPlugin('java')) { + sourceCompatibility = 11 + targetCompatibility = 11 + } +} + +dependencies { + implementation project(':metadata-models') + implementation project(path: ':metadata-integration:java:datahub-client', configuration: 'shadow') + + implementation externalDependency.protobuf + implementation externalDependency.jgrapht + compileOnly externalDependency.lombok + annotationProcessor externalDependency.lombok + testImplementation externalDependency.junitJupiterApi + testRuntimeOnly externalDependency.junitJupiterEngine +} + +import java.nio.file.Path +task compileProtobuf { + doLast { + def basePath = Path.of("${projectDir}/src/test/resources") + [ + fileTree("${projectDir}/src/test/resources/protobuf") { include "*.proto" }, + fileTree("${projectDir}/src/test/resources/extended_protobuf") { include "*.proto" } + ].collectMany { it.collect() }.each { f -> + def input = basePath.relativize(Path.of(f.getAbsolutePath())) + println(input.toString() + " => " + input.toString().replace(".proto", ".protoc")) + exec { + workingDir "${projectDir}/src/test/resources" + commandLine 'protoc', '--proto_path=.', '--include_imports', '--include_source_info', + "--descriptor_set_out=${input.toString().replace(".proto", ".protoc")}", + input + } + } + } +} + +jacocoTestReport { + dependsOn test // tests are required to run before generating the report +} + +test { + useJUnit() + finalizedBy jacocoTestReport +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/com/google/protobuf/ExtensionRegistry.java b/metadata-integration/java/datahub-protobuf/src/main/java/com/google/protobuf/ExtensionRegistry.java new file mode 100644 index 00000000000000..e6f93eb1a4f0cd --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/com/google/protobuf/ExtensionRegistry.java @@ -0,0 +1,388 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +import com.google.protobuf.Descriptors.Descriptor; +import com.google.protobuf.Descriptors.FieldDescriptor; + +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * A table of known extensions, searchable by name or field number. When parsing a protocol message + * that might have extensions, you must provide an {@code ExtensionRegistry} in which you have + * registered any extensions that you want to be able to parse. Otherwise, those extensions will + * just be treated like unknown fields. + * + *

For example, if you had the {@code .proto} file: + * + *

+ * option java_class = "MyProto";
+ *
+ * message Foo {
+ *   extensions 1000 to max;
+ * }
+ *
+ * extend Foo {
+ *   optional int32 bar;
+ * }
+ * 
+ * + * Then you might write code like: + * + *
+ * ExtensionRegistry registry = ExtensionRegistry.newInstance();
+ * registry.add(MyProto.bar);
+ * MyProto.Foo message = MyProto.Foo.parseFrom(input, registry);
+ * 
+ * + *

Background: + * + *

You might wonder why this is necessary. Two alternatives might come to mind. First, you might + * imagine a system where generated extensions are automatically registered when their containing + * classes are loaded. This is a popular technique, but is bad design; among other things, it + * creates a situation where behavior can change depending on what classes happen to be loaded. It + * also introduces a security vulnerability, because an unprivileged class could cause its code to + * be called unexpectedly from a privileged class by registering itself as an extension of the right + * type. + * + *

Another option you might consider is lazy parsing: do not parse an extension until it is first + * requested, at which point the caller must provide a type to use. This introduces a different set + * of problems. First, it would require a mutex lock any time an extension was accessed, which would + * be slow. Second, corrupt data would not be detected until first access, at which point it would + * be much harder to deal with it. Third, it could violate the expectation that message objects are + * immutable, since the type provided could be any arbitrary message class. An unprivileged user + * could take advantage of this to inject a mutable object into a message belonging to privileged + * code and create mischief. + * + * @author kenton@google.com Kenton Varda + */ +public class ExtensionRegistry extends ExtensionRegistryLite { + /** Construct a new, empty instance. */ + public static ExtensionRegistry newInstance() { + return new ExtensionRegistry(); + } + + /** Get the unmodifiable singleton empty instance. */ + public static ExtensionRegistry getEmptyRegistry() { + return EMPTY_REGISTRY; + } + + + /** Returns an unmodifiable view of the registry. */ + @Override + public ExtensionRegistry getUnmodifiable() { + return new ExtensionRegistry(this); + } + + /** A (Descriptor, Message) pair, returned by lookup methods. */ + public static final class ExtensionInfo { + /** The extension's descriptor. */ + public final FieldDescriptor descriptor; + + /** + * A default instance of the extension's type, if it has a message type. Otherwise, {@code + * null}. + */ + public final Message defaultInstance; + + private ExtensionInfo(final FieldDescriptor descriptor) { + this.descriptor = descriptor; + defaultInstance = null; + } + + private ExtensionInfo(final FieldDescriptor descriptor, final Message defaultInstance) { + this.descriptor = descriptor; + this.defaultInstance = defaultInstance; + } + } + + /** Deprecated. Use {@link #findImmutableExtensionByName(String)} instead. */ + @Deprecated + public ExtensionInfo findExtensionByName(final String fullName) { + return findImmutableExtensionByName(fullName); + } + + /** + * Find an extension for immutable APIs by fully-qualified field name, in the proto namespace. + * i.e. {@code result.descriptor.fullName()} will match {@code fullName} if a match is found. + * + * @return Information about the extension if found, or {@code null} otherwise. + */ + public ExtensionInfo findImmutableExtensionByName(final String fullName) { + return immutableExtensionsByName.get(fullName); + } + + /** + * Find an extension for mutable APIs by fully-qualified field name, in the proto namespace. i.e. + * {@code result.descriptor.fullName()} will match {@code fullName} if a match is found. + * + * @return Information about the extension if found, or {@code null} otherwise. + */ + public ExtensionInfo findMutableExtensionByName(final String fullName) { + return mutableExtensionsByName.get(fullName); + } + + /** Deprecated. Use {@link #findImmutableExtensionByNumber( Descriptors.Descriptor, int)} */ + @Deprecated + public ExtensionInfo findExtensionByNumber( + final Descriptor containingType, final int fieldNumber) { + return findImmutableExtensionByNumber(containingType, fieldNumber); + } + + /** + * Find an extension by containing type and field number for immutable APIs. + * + * @return Information about the extension if found, or {@code null} otherwise. + */ + public ExtensionInfo findImmutableExtensionByNumber( + final Descriptor containingType, final int fieldNumber) { + return immutableExtensionsByNumber.get(new DescriptorIntPair(containingType, fieldNumber)); + } + + /** + * Find an extension by containing type and field number for mutable APIs. + * + * @return Information about the extension if found, or {@code null} otherwise. + */ + public ExtensionInfo findMutableExtensionByNumber( + final Descriptor containingType, final int fieldNumber) { + return mutableExtensionsByNumber.get(new DescriptorIntPair(containingType, fieldNumber)); + } + + /** + * Find all extensions for mutable APIs by fully-qualified name of extended class. Note that this + * method is more computationally expensive than getting a single extension by name or number. + * + * @return Information about the extensions found, or {@code null} if there are none. + */ + public Set getAllMutableExtensionsByExtendedType(final String fullName) { + HashSet extensions = new HashSet(); + for (DescriptorIntPair pair : mutableExtensionsByNumber.keySet()) { + if (pair.descriptor.getFullName().equals(fullName)) { + extensions.add(mutableExtensionsByNumber.get(pair)); + } + } + return extensions; + } + + /** + * Find all extensions for immutable APIs by fully-qualified name of extended class. Note that + * this method is more computationally expensive than getting a single extension by name or + * number. + * + * @return Information about the extensions found, or {@code null} if there are none. + */ + public Set getAllImmutableExtensionsByExtendedType(final String fullName) { + HashSet extensions = new HashSet(); + for (DescriptorIntPair pair : immutableExtensionsByNumber.keySet()) { + if (pair.descriptor.getFullName().equals(fullName)) { + extensions.add(immutableExtensionsByNumber.get(pair)); + } + } + return extensions; + } + + /** Add an extension from a generated file to the registry. */ + public void add(final Extension extension) { + if (extension.getExtensionType() != Extension.ExtensionType.IMMUTABLE + && extension.getExtensionType() != Extension.ExtensionType.MUTABLE) { + // do not support other extension types. ignore + return; + } + add(newExtensionInfo(extension), extension.getExtensionType()); + } + + /** Add an extension from a generated file to the registry. */ + public void add(final GeneratedMessage.GeneratedExtension extension) { + add((Extension) extension); + } + + static ExtensionInfo newExtensionInfo(final Extension extension) { + if (extension.getDescriptor().getJavaType() == FieldDescriptor.JavaType.MESSAGE) { + if (extension.getMessageDefaultInstance() == null) { + throw new IllegalStateException( + "Registered message-type extension had null default instance: " + + extension.getDescriptor().getFullName()); + } + return new ExtensionInfo( + extension.getDescriptor(), (Message) extension.getMessageDefaultInstance()); + } else { + return new ExtensionInfo(extension.getDescriptor(), null); + } + } + + /** Add a non-message-type extension to the registry by descriptor. */ + public void add(final FieldDescriptor type) { + if (type.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { + throw new IllegalArgumentException( + "ExtensionRegistry.add() must be provided a default instance when " + + "adding an embedded message extension."); + } + ExtensionInfo info = new ExtensionInfo(type, null); + add(info, Extension.ExtensionType.IMMUTABLE); + add(info, Extension.ExtensionType.MUTABLE); + } + + /** Add a message-type extension to the registry by descriptor. */ + public void add(final FieldDescriptor type, final Message defaultInstance) { + if (type.getJavaType() != FieldDescriptor.JavaType.MESSAGE) { + throw new IllegalArgumentException( + "ExtensionRegistry.add() provided a default instance for a non-message extension."); + } + add(new ExtensionInfo(type, defaultInstance), Extension.ExtensionType.IMMUTABLE); + } + + // ================================================================= + // Private stuff. + + private ExtensionRegistry() { + this.immutableExtensionsByName = new HashMap(); + this.mutableExtensionsByName = new HashMap(); + this.immutableExtensionsByNumber = new HashMap(); + this.mutableExtensionsByNumber = new HashMap(); + } + + private ExtensionRegistry(ExtensionRegistry other) { + super(other); + this.immutableExtensionsByName = Collections.unmodifiableMap(other.immutableExtensionsByName); + this.mutableExtensionsByName = Collections.unmodifiableMap(other.mutableExtensionsByName); + this.immutableExtensionsByNumber = + Collections.unmodifiableMap(other.immutableExtensionsByNumber); + this.mutableExtensionsByNumber = Collections.unmodifiableMap(other.mutableExtensionsByNumber); + } + + private final Map immutableExtensionsByName; + private final Map mutableExtensionsByName; + private final Map immutableExtensionsByNumber; + private final Map mutableExtensionsByNumber; + + ExtensionRegistry(boolean empty) { + super(EMPTY_REGISTRY_LITE); + this.immutableExtensionsByName = Collections.emptyMap(); + this.mutableExtensionsByName = Collections.emptyMap(); + this.immutableExtensionsByNumber = Collections.emptyMap(); + this.mutableExtensionsByNumber = Collections.emptyMap(); + } + + static final ExtensionRegistry EMPTY_REGISTRY = new ExtensionRegistry(true); + + private void add(final ExtensionInfo extension, final Extension.ExtensionType extensionType) { + if (!extension.descriptor.isExtension()) { + throw new IllegalArgumentException( + "ExtensionRegistry.add() was given a FieldDescriptor for a regular " + + "(non-extension) field."); + } + + Map extensionsByName; + Map extensionsByNumber; + switch (extensionType) { + case IMMUTABLE: + extensionsByName = immutableExtensionsByName; + extensionsByNumber = immutableExtensionsByNumber; + break; + case MUTABLE: + extensionsByName = mutableExtensionsByName; + extensionsByNumber = mutableExtensionsByNumber; + break; + default: + // Ignore the unknown supported type. + return; + } + + extensionsByName.put(extension.descriptor.getFullName(), extension); + extensionsByNumber.put( + new DescriptorIntPair( + extension.descriptor.getContainingType(), extension.descriptor.getNumber()), + extension); + + final FieldDescriptor field = extension.descriptor; + if (field.getContainingType().getOptions().getMessageSetWireFormat() + && field.getType() == FieldDescriptor.Type.MESSAGE + && field.isOptional() + && field.getExtensionScope() == field.getMessageType()) { + // This is an extension of a MessageSet type defined within the extension + // type's own scope. For backwards-compatibility, allow it to be looked + // up by type name. + extensionsByName.put(field.getMessageType().getFullName(), extension); + } + } + + /** + * + * DataHub modification of hashcode/equals based on full name. The upstream + * project uses the descriptor and in our use of the registry results + * in objects that are practically identical except for the `jsonName` field. + * This is a difference generated by internal components and is not under + * our control. + * + * A (GenericDescriptor, int) pair, used as a map key. + * + * */ + private static final class DescriptorIntPair { + private final String fullName; + private final Descriptor descriptor; + private final int number; + + DescriptorIntPair(final Descriptor descriptor, final int number) { + this.descriptor = descriptor; + this.fullName = descriptor.getFullName(); + this.number = number; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + DescriptorIntPair that = (DescriptorIntPair) o; + + if (number != that.number) { + return false; + } + return fullName.equals(that.fullName); + } + + @Override + public int hashCode() { + int result = fullName.hashCode(); + result = 31 * result + number; + return result; + } + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufDataset.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufDataset.java new file mode 100644 index 00000000000000..905060d10c3d28 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufDataset.java @@ -0,0 +1,236 @@ +package datahub.protobuf; + +import com.google.protobuf.DescriptorProtos.FileDescriptorSet; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.FabricType; +import com.linkedin.common.Status; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.schema.KafkaSchema; +import com.linkedin.schema.SchemaField; +import com.linkedin.schema.SchemaFieldArray; +import com.linkedin.schema.SchemaMetadata; +import com.linkedin.util.Pair; +import datahub.protobuf.model.ProtobufGraph; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.VisitContext; +import datahub.protobuf.visitors.dataset.DatasetVisitor; +import datahub.protobuf.visitors.dataset.InstitutionalMemoryVisitor; +import datahub.protobuf.visitors.dataset.KafkaTopicPropertyVisitor; +import datahub.protobuf.visitors.dataset.ProtobufExtensionPropertyVisitor; +import datahub.protobuf.visitors.dataset.ProtobufExtensionTagAssocVisitor; +import datahub.protobuf.visitors.dataset.ProtobufExtensionTermAssocVisitor; +import datahub.protobuf.visitors.field.SchemaFieldVisitor; +import datahub.event.MetadataChangeProposalWrapper; +import datahub.protobuf.visitors.field.ProtobufExtensionFieldVisitor; +import datahub.protobuf.visitors.tags.ProtobufExtensionTagVisitor; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.io.InputStream; +import java.util.Base64; +import java.util.Collection; +import java.util.Comparator; +import java.util.Optional; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + + +public class ProtobufDataset { + + public static ProtobufDataset.Builder builder() { + return new Builder(); + } + + public static class Builder { + private DataPlatformUrn dataPlatformUrn; + private FabricType fabricType; + private AuditStamp auditStamp; + private byte[] protocBytes; + private String messageName; + private String schema; + private String githubOrganization; + private String slackTeamId; + + public Builder setGithubOrganization(@Nullable String githubOrganization) { + this.githubOrganization = githubOrganization; + return this; + } + + public Builder setSlackTeamId(@Nullable String slackTeamId) { + this.slackTeamId = slackTeamId; + return this; + } + + public Builder setProtocIn(InputStream protocIn) throws IOException { + return setProtocBytes(protocIn.readAllBytes()); + } + + public Builder setDataPlatformUrn(@Nullable DataPlatformUrn dataPlatformUrn) { + this.dataPlatformUrn = dataPlatformUrn; + return this; + } + + public Builder setProtocBytes(byte[] protocBytes) { + this.protocBytes = protocBytes; + return this; + } + + public Builder setFabricType(FabricType fabricType) { + this.fabricType = fabricType; + return this; + } + + public Builder setAuditStamp(AuditStamp auditStamp) { + this.auditStamp = auditStamp; + return this; + } + + public Builder setMessageName(@Nullable String messageName) { + this.messageName = messageName; + return this; + } + + public Builder setSchema(@Nullable String schema) { + this.schema = schema; + return this; + } + + public ProtobufDataset build() throws IOException { + FileDescriptorSet fileSet = FileDescriptorSet.parseFrom(protocBytes); + + return new ProtobufDataset( + Optional.ofNullable(dataPlatformUrn).orElse(new DataPlatformUrn("kafka")), + new ProtobufGraph(fileSet, messageName), schema, auditStamp, fabricType) + .setMetadataChangeProposalVisitors( + List.of( + new ProtobufExtensionTagVisitor() + ) + ) + .setFieldVisitor(new ProtobufExtensionFieldVisitor()) + .setDatasetVisitor(DatasetVisitor.builder() + .protocBase64(Base64.getEncoder().encodeToString(protocBytes)) + .datasetPropertyVisitors( + List.of( + new KafkaTopicPropertyVisitor(), + new ProtobufExtensionPropertyVisitor() + ) + ) + .institutionalMemoryMetadataVisitors( + List.of( + new InstitutionalMemoryVisitor(slackTeamId, githubOrganization) + ) + ) + .tagAssociationVisitors( + List.of( + new ProtobufExtensionTagAssocVisitor() + ) + ) + .termAssociationVisitors( + List.of( + new ProtobufExtensionTermAssocVisitor() + ) + ) + .build() + ); + } + } + + private final DatasetUrn datasetUrn; + private final Optional schemaSource; + private final ProtobufGraph graph; + private final AuditStamp auditStamp; + private final VisitContext.VisitContextBuilder contextBuilder; + + private DatasetVisitor datasetVisitor; + private ProtobufModelVisitor> fieldVisitor; + private List>> mcpwVisitors; + + + public ProtobufDataset(DataPlatformUrn dataPlatformUrn, ProtobufGraph graph, String schema, AuditStamp auditStamp, FabricType fabricType) { + this.schemaSource = Optional.ofNullable(schema); + this.auditStamp = auditStamp; + this.graph = graph; + + // Default - non-protobuf extension + fieldVisitor = new SchemaFieldVisitor(); + mcpwVisitors = List.of(); + + this.datasetUrn = new DatasetUrn(dataPlatformUrn, this.graph.getFullName(), fabricType); + this.contextBuilder = VisitContext.builder().datasetUrn(this.datasetUrn).auditStamp(this.auditStamp); + } + + public ProtobufDataset setMetadataChangeProposalVisitors(List>> visitors) { + this.mcpwVisitors = visitors; + return this; + } + + public ProtobufDataset setDatasetVisitor(DatasetVisitor datasetVisitor) { + this.datasetVisitor = datasetVisitor; + return this; + } + + public ProtobufDataset setFieldVisitor(ProtobufModelVisitor> visitor) { + this.fieldVisitor = visitor; + return this; + } + + public ProtobufGraph getGraph() { + return graph; + } + + public AuditStamp getAuditStamp() { + return auditStamp; + } + + public DatasetUrn getDatasetUrn() { + return datasetUrn; + } + + public Stream>> getAllMetadataChangeProposals() { + return Stream.of(getVisitorMCPs(), getDatasetMCPs()); + } + + public List> getVisitorMCPs() { + return graph.accept(contextBuilder, mcpwVisitors).collect(Collectors.toList()); + } + + public List> getDatasetMCPs() { + return Stream.concat( + this.graph.accept(contextBuilder, List.of(datasetVisitor)), + Stream.of( + new MetadataChangeProposalWrapper<>(DatasetUrn.ENTITY_TYPE, datasetUrn.toString(), ChangeType.UPSERT, + getSchemaMetadata(), "schemaMetadata"), + new MetadataChangeProposalWrapper<>(DatasetUrn.ENTITY_TYPE, datasetUrn.toString(), ChangeType.UPSERT, + new Status().setRemoved(false), "status") + ) + ).collect(Collectors.toList()); + } + + public SchemaMetadata getSchemaMetadata() { + SchemaMetadata.PlatformSchema platformSchema = new SchemaMetadata.PlatformSchema(); + schemaSource.ifPresent(schemaStr -> platformSchema.setKafkaSchema(new KafkaSchema().setDocumentSchema(schemaStr))); + + List schemaFields = graph.accept(contextBuilder, List.of(fieldVisitor)) + .sorted(COMPARE_BY_ROOT_MESSAGE_FIELD_WEIGHT.thenComparing(COMPARE_BY_FIELD_PATH)) + .map(Pair::getFirst) + .collect(Collectors.toList()); + + return new SchemaMetadata() + .setSchemaName(graph.getFullName()) + .setPlatform(datasetUrn.getPlatformEntity()) + .setCreated(auditStamp) + .setLastModified(auditStamp) + .setVersion(graph.getMajorVersion()) + .setHash(graph.getHash()) + .setPlatformSchema(platformSchema) + .setFields(new SchemaFieldArray(schemaFields)); + } + + public static final Comparator> COMPARE_BY_ROOT_MESSAGE_FIELD_WEIGHT = Comparator.comparing(Pair::getSecond); + public static final Comparator> COMPARE_BY_FIELD_PATH = Comparator + .comparing(p -> p.getFirst().getFieldPath()); +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufUtils.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufUtils.java new file mode 100644 index 00000000000000..347bf593dc0687 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufUtils.java @@ -0,0 +1,101 @@ +package datahub.protobuf; + +import com.google.common.collect.ImmutableList; +import com.google.protobuf.DescriptorProtos; +import com.google.protobuf.Descriptors; +import com.google.protobuf.ExtensionRegistry; + +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class ProtobufUtils { + private ProtobufUtils() { } + + public static String collapseLocationComments(DescriptorProtos.SourceCodeInfo.Location location) { + String orig = Stream.concat(location.getLeadingDetachedCommentsList().stream(), + Stream.of(location.getLeadingComments(), location.getTrailingComments())) + .filter(Objects::nonNull) + .flatMap(line -> Arrays.stream(line.split("\n"))) + .map(line -> line.replaceFirst("^[*/ ]+", "")) + .collect(Collectors.joining("\n")) + .trim(); + + /* + * Sometimes DataHub doesn't like these strings. Not sure if its DataHub + * or protobuf issue: https://github.com/protocolbuffers/protobuf/issues/4691 + * + * We essentially smash utf8 chars to ascii here + */ + return new String(orig.getBytes(StandardCharsets.ISO_8859_1)); + } + + public static ExtensionRegistry buildRegistry(DescriptorProtos.FileDescriptorSet fileSet) { + ExtensionRegistry registry = ExtensionRegistry.newInstance(); + Map descriptorProtoMap = fileSet.getFileList().stream() + .collect(Collectors.toMap(DescriptorProtos.FileDescriptorProto::getName, Function.identity())); + Map descriptorCache = new HashMap<>(); + + fileSet.getFileList().forEach(fdp -> { + try { + Descriptors.FileDescriptor file = descriptorFromProto(fdp, descriptorProtoMap, descriptorCache); + Stream.concat(file.getExtensions().stream(), file.getMessageTypes().stream().flatMap(msg -> msg.getExtensions().stream())) + .forEach(ext -> addToRegistry(fdp, ext, registry)); + } catch (Descriptors.DescriptorValidationException e) { + e.printStackTrace(); + } + }); + return registry; + } + + private static void addToRegistry(DescriptorProtos.FileDescriptorProto fileDescriptorProto, + Descriptors.FieldDescriptor fieldDescriptor, ExtensionRegistry registry) { + if (fieldDescriptor.getJavaType() != Descriptors.FieldDescriptor.JavaType.MESSAGE) { + registry.add(fieldDescriptor); + } else { + fileDescriptorProto.getMessageTypeList().stream() + .filter(typ -> typ.getName().equals(fieldDescriptor.getMessageType().getName())) + .findFirst().ifPresent(messageType -> registry.add(fieldDescriptor, messageType.getDefaultInstanceForType())); + fieldDescriptor.getMessageType().getFields() + .stream().filter(Descriptors.FieldDescriptor::isExtension) + .forEach(f -> addToRegistry(fileDescriptorProto, f, registry)); + } + } + + /** + * Recursively constructs file descriptors for all dependencies of the supplied proto and returns + * a {@link Descriptors.FileDescriptor} for the supplied proto itself. For maximal efficiency, reuse the + * descriptorCache argument across calls. + */ + private static Descriptors.FileDescriptor descriptorFromProto( + DescriptorProtos.FileDescriptorProto descriptorProto, + Map descriptorProtoIndex, + Map descriptorCache) throws Descriptors.DescriptorValidationException { + // First, check the cache. + String descriptorName = descriptorProto.getName(); + if (descriptorCache.containsKey(descriptorName)) { + return descriptorCache.get(descriptorName); + } + + // Then, fetch all the required dependencies recursively. + ImmutableList.Builder dependencies = ImmutableList.builder(); + for (String dependencyName : descriptorProto.getDependencyList()) { + if (!descriptorProtoIndex.containsKey(dependencyName)) { + throw new IllegalArgumentException("Could not find dependency: " + dependencyName); + } + DescriptorProtos.FileDescriptorProto dependencyProto = descriptorProtoIndex.get(dependencyName); + dependencies.add(descriptorFromProto(dependencyProto, descriptorProtoIndex, descriptorCache)); + } + + // Finally, construct the actual descriptor. + Descriptors.FileDescriptor[] empty = new Descriptors.FileDescriptor[0]; + return Descriptors.FileDescriptor.buildFrom(descriptorProto, dependencies.build().toArray(empty), false); + } + +} + diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/FieldTypeEdge.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/FieldTypeEdge.java new file mode 100644 index 00000000000000..7926ba0702762e --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/FieldTypeEdge.java @@ -0,0 +1,55 @@ +package datahub.protobuf.model; + +import lombok.Builder; +import lombok.Getter; +import org.jgrapht.graph.DefaultDirectedGraph; +import org.jgrapht.graph.DefaultEdge; + + +@Builder +@Getter +public class FieldTypeEdge extends DefaultEdge { + @Builder.Default + private final String type = ""; + @Builder.Default + private final boolean isMessageType = false; + private final transient ProtobufElement edgeSource; + private final transient ProtobufElement edgeTarget; + + public FieldTypeEdge inGraph(DefaultDirectedGraph g) { + g.addEdge(edgeSource, edgeTarget, this); + return this; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + FieldTypeEdge that = (FieldTypeEdge) o; + + if (isMessageType() != that.isMessageType()) { + return false; + } + if (!getType().equals(that.getType())) { + return false; + } + if (!getEdgeSource().equals(that.getEdgeSource())) { + return false; + } + return getEdgeTarget().equals(that.getEdgeTarget()); + } + + @Override + public int hashCode() { + int result = getType().hashCode(); + result = 31 * result + (isMessageType() ? 1 : 0); + result = 31 * result + getEdgeSource().hashCode(); + result = 31 * result + getEdgeTarget().hashCode(); + return result; + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufElement.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufElement.java new file mode 100644 index 00000000000000..91c76fe16b73f4 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufElement.java @@ -0,0 +1,32 @@ +package datahub.protobuf.model; + +import com.google.protobuf.DescriptorProtos.DescriptorProto; +import com.google.protobuf.DescriptorProtos.FileDescriptorProto; +import com.google.protobuf.DescriptorProtos.SourceCodeInfo; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.VisitContext; + +import java.util.List; +import java.util.stream.Stream; + + +public interface ProtobufElement { + String name(); + String fullName(); + String nativeType(); + String comment(); + String fieldPathType(); + + FileDescriptorProto fileProto(); + DescriptorProto messageProto(); + + default Stream messageLocations() { + List fileLocations = fileProto().getSourceCodeInfo().getLocationList(); + return fileLocations.stream() + .filter(loc -> loc.getPathCount() > 1 + && loc.getPath(0) == FileDescriptorProto.MESSAGE_TYPE_FIELD_NUMBER + && messageProto() == fileProto().getMessageType(loc.getPath(1))); + } + + Stream accept(ProtobufModelVisitor v, VisitContext context); +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufEnum.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufEnum.java new file mode 100644 index 00000000000000..ff894112d0d516 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufEnum.java @@ -0,0 +1,87 @@ +package datahub.protobuf.model; + +import com.google.protobuf.DescriptorProtos.DescriptorProto; +import com.google.protobuf.DescriptorProtos.EnumDescriptorProto; +import com.google.protobuf.DescriptorProtos.FileDescriptorProto; +import com.linkedin.schema.EnumType; +import com.linkedin.schema.SchemaFieldDataType; +import datahub.protobuf.ProtobufUtils; +import lombok.Builder; +import lombok.Getter; + +import java.util.stream.Collectors; + + +@Getter +public class ProtobufEnum extends ProtobufMessage { + + private final EnumDescriptorProto enumProto; + + @Builder(builderMethodName = "enumBuilder") + public ProtobufEnum(FileDescriptorProto fileProto, + DescriptorProto messageProto, + EnumDescriptorProto enumProto) { + super(messageProto, null, fileProto); + this.enumProto = enumProto; + } + + @Override + public String name() { + return enumProto.getName(); + } + + @Override + public String fieldPathType() { + return "[type=enum]"; + } + + @Override + public String nativeType() { + return "enum"; + } + + @Override + public SchemaFieldDataType schemaFieldDataType() throws IllegalStateException { + return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new EnumType())); + } + + @Override + public String comment() { + return messageLocations() + .filter(loc -> loc.getPathCount() > 3 + && loc.getPath(2) == DescriptorProto.ENUM_TYPE_FIELD_NUMBER + && enumProto == messageProto().getEnumType(loc.getPath(3))) + .map(ProtobufUtils::collapseLocationComments) + .collect(Collectors.joining("\n")) + .trim(); + } + + @Override + public String toString() { + return String.format("ProtobufEnum[%s]", fullName()); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + if (!super.equals(o)) { + return false; + } + + ProtobufEnum that = (ProtobufEnum) o; + + return getEnumProto().equals(that.getEnumProto()); + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + getEnumProto().hashCode(); + return result; + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java new file mode 100644 index 00000000000000..61b0572290606a --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java @@ -0,0 +1,239 @@ +package datahub.protobuf.model; + +import com.google.protobuf.DescriptorProtos.DescriptorProto; +import com.google.protobuf.DescriptorProtos.FieldDescriptorProto; +import com.google.protobuf.DescriptorProtos.FileDescriptorProto; +import com.google.protobuf.DescriptorProtos.OneofDescriptorProto; +import com.linkedin.data.template.StringArray; +import com.linkedin.schema.ArrayType; +import com.linkedin.schema.BooleanType; +import com.linkedin.schema.BytesType; +import com.linkedin.schema.EnumType; +import com.linkedin.schema.FixedType; +import com.linkedin.schema.NumberType; +import com.linkedin.schema.RecordType; +import com.linkedin.schema.SchemaFieldDataType; +import com.linkedin.schema.StringType; +import datahub.protobuf.ProtobufUtils; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.VisitContext; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; + +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; + + +@Builder(toBuilder = true) +@Getter +@AllArgsConstructor +public class ProtobufField implements ProtobufElement { + + private final ProtobufMessage protobufMessage; + private final FieldDescriptorProto fieldProto; + private final String nativeType; + private final String fieldPathType; + private final Boolean isMessageType; + private final SchemaFieldDataType schemaFieldDataType; + + public OneofDescriptorProto oneOfProto() { + if (fieldProto.hasOneofIndex()) { + return protobufMessage.messageProto().getOneofDecl(fieldProto.getOneofIndex()); + } + return null; + } + + @Override + public FileDescriptorProto fileProto() { + return protobufMessage.fileProto(); + } + + @Override + public DescriptorProto messageProto() { + return protobufMessage.messageProto(); + } + + public String parentMessageName() { + return protobufMessage.fullName(); + } + + @Override + public String name() { + return fieldProto.getName(); + } + + @Override + public String fullName() { + return String.join(".", parentMessageName(), name()); + } + + @Override + public String nativeType() { + return Optional.ofNullable(nativeType).orElseGet(() -> { + if (fieldProto.getTypeName().isEmpty()) { + return fieldProto.getType().name().split("_")[1].toLowerCase(); + } else { + return fieldProto.getTypeName().replaceFirst("^[.]", ""); + } + }); + } + + @Override + public String fieldPathType() { + return Optional.ofNullable(fieldPathType).orElseGet(() -> { + final String pathType; + + switch (fieldProto.getType()) { + case TYPE_DOUBLE: + pathType = "double"; + break; + case TYPE_FLOAT: + pathType = "float"; + break; + case TYPE_SFIXED64: + case TYPE_FIXED64: + case TYPE_UINT64: + case TYPE_INT64: + case TYPE_SINT64: + pathType = "long"; + break; + case TYPE_FIXED32: + case TYPE_SFIXED32: + case TYPE_INT32: + case TYPE_UINT32: + case TYPE_SINT32: + pathType = "int"; + break; + case TYPE_BYTES: + pathType = "bytes"; + break; + case TYPE_ENUM: + pathType = "enum"; + break; + case TYPE_BOOL: + pathType = "boolean"; + break; + case TYPE_STRING: + pathType = "string"; + break; + case TYPE_GROUP: + case TYPE_MESSAGE: + pathType = nativeType().replace(".", "_"); + break; + default: + throw new IllegalStateException(String.format("Unexpected FieldDescriptorProto => FieldPathType %s", fieldProto.getType())); + } + + StringArray fieldPath = new StringArray(); + + if (schemaFieldDataType().getType().isArrayType()) { + fieldPath.add("[type=array]"); + } + + fieldPath.add(String.format("[type=%s]", pathType)); + + return String.join(".", fieldPath); + }); + } + + public boolean isMessage() { + return Optional.ofNullable(isMessageType).orElseGet(() -> + fieldProto.getType().equals(FieldDescriptorProto.Type.TYPE_MESSAGE)); + } + + public int sortWeight() { + return messageProto().getFieldList().indexOf(fieldProto) + 1; + } + + public SchemaFieldDataType schemaFieldDataType() throws IllegalStateException { + return Optional.ofNullable(schemaFieldDataType).orElseGet(() -> { + final SchemaFieldDataType.Type fieldType; + + switch (fieldProto.getType()) { + case TYPE_DOUBLE: + case TYPE_FLOAT: + case TYPE_INT64: + case TYPE_UINT64: + case TYPE_INT32: + case TYPE_UINT32: + case TYPE_SINT32: + case TYPE_SINT64: + fieldType = SchemaFieldDataType.Type.create(new NumberType()); + break; + case TYPE_GROUP: + case TYPE_MESSAGE: + fieldType = SchemaFieldDataType.Type.create(new RecordType()); + break; + case TYPE_BYTES: + fieldType = SchemaFieldDataType.Type.create(new BytesType()); + break; + case TYPE_ENUM: + fieldType = SchemaFieldDataType.Type.create(new EnumType()); + break; + case TYPE_BOOL: + fieldType = SchemaFieldDataType.Type.create(new BooleanType()); + break; + case TYPE_STRING: + fieldType = SchemaFieldDataType.Type.create(new StringType()); + break; + case TYPE_FIXED64: + case TYPE_FIXED32: + case TYPE_SFIXED32: + case TYPE_SFIXED64: + fieldType = SchemaFieldDataType.Type.create(new FixedType()); + break; + default: + throw new IllegalStateException(String.format("Unexpected FieldDescriptorProto => SchemaFieldDataType: %s", fieldProto.getType())); + } + + if (fieldProto.getLabel().equals(FieldDescriptorProto.Label.LABEL_REPEATED)) { + return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new ArrayType() + .setNestedType(new StringArray()))); + } + + return new SchemaFieldDataType().setType(fieldType); + }); + } + + @Override + public String comment() { + return messageLocations() + .filter(loc -> loc.getPathCount() > 3 + && loc.getPath(2) == DescriptorProto.FIELD_FIELD_NUMBER + && fieldProto == messageProto().getField(loc.getPath(3))) + .map(ProtobufUtils::collapseLocationComments) + .collect(Collectors.joining("\n")) + .trim(); + } + + @Override + public Stream accept(ProtobufModelVisitor visitor, VisitContext context) { + return visitor.visitField(this, context); + } + + @Override + public String toString() { + return String.format("ProtobufField[%s]", fullName()); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + ProtobufElement that = (ProtobufElement) o; + + return fullName().equals(that.fullName()); + } + + @Override + public int hashCode() { + return fullName().hashCode(); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufGraph.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufGraph.java new file mode 100644 index 00000000000000..a3394a0a579fbc --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufGraph.java @@ -0,0 +1,351 @@ +package datahub.protobuf.model; + +import com.google.protobuf.DescriptorProtos; +import com.google.protobuf.ExtensionRegistry; +import com.google.protobuf.InvalidProtocolBufferException; +import datahub.protobuf.ProtobufUtils; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.VisitContext; +import org.jgrapht.GraphPath; +import org.jgrapht.alg.shortestpath.AllDirectedPaths; +import org.jgrapht.graph.DefaultDirectedGraph; + +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + + +public class ProtobufGraph extends DefaultDirectedGraph { + private final transient ProtobufMessage rootProtobufMessage; + private final transient AllDirectedPaths directedPaths; + private final transient ExtensionRegistry registry; + + public ProtobufGraph(DescriptorProtos.FileDescriptorSet fileSet) throws InvalidProtocolBufferException { + this(fileSet, null, true); + } + + public ProtobufGraph(DescriptorProtos.FileDescriptorSet fileSet, String messageName) throws InvalidProtocolBufferException { + this(fileSet, messageName, true); + } + + public ProtobufGraph(DescriptorProtos.FileDescriptorSet fileSet, String messageName, boolean flattenGoogleWrapped) throws InvalidProtocolBufferException { + super(FieldTypeEdge.class); + this.registry = ProtobufUtils.buildRegistry(fileSet); + DescriptorProtos.FileDescriptorSet fileSetExtended = DescriptorProtos.FileDescriptorSet + .parseFrom(fileSet.toByteArray(), this.registry); + buildProtobufGraph(fileSetExtended); + if (flattenGoogleWrapped) { + flattenGoogleWrapped(); + } + + if (messageName != null) { + this.rootProtobufMessage = findMessage(messageName); + } else { + DescriptorProtos.FileDescriptorProto lastFile = fileSetExtended.getFile(fileSetExtended.getFileCount() - 1); + this.rootProtobufMessage = autodetectRootMessage(lastFile); + } + + this.directedPaths = new AllDirectedPaths<>(this); + } + + public List> getAllPaths(ProtobufElement a, ProtobufElement b) { + return directedPaths.getAllPaths(a, b, true, null); + } + + public ExtensionRegistry getRegistry() { + return registry; + } + + public String getFullName() { + return rootProtobufMessage.fullName(); + } + + public int getMajorVersion() { + return rootProtobufMessage.majorVersion(); + } + + public String getComment() { + return rootProtobufMessage.comment(); + } + + public ProtobufMessage root() { + return rootProtobufMessage; + } + + + public > Stream accept(VisitContext.VisitContextBuilder contextBuilder, Collection visitors) { + VisitContext context = Optional.ofNullable(contextBuilder).orElse(VisitContext.builder()).graph(this).build(); + return accept(context, visitors); + } + + public > Stream accept(VisitContext context, Collection visitors) { + return Stream.concat( + visitors.stream().flatMap(visitor -> visitor.visitGraph(context)), + vertexSet().stream().flatMap(vertex -> visitors.stream().flatMap(visitor -> vertex.accept(visitor, context))) + ); + } + + protected ProtobufMessage autodetectRootMessage(DescriptorProtos.FileDescriptorProto lastFile) throws IllegalArgumentException { + return (ProtobufMessage) vertexSet().stream() + .filter(v -> // incoming edges of fields + lastFile.equals(v.fileProto()) + && v instanceof ProtobufMessage + && incomingEdgesOf(v).isEmpty() + && outgoingEdgesOf(v).stream() + .flatMap(e -> incomingEdgesOf(e.getEdgeTarget()).stream()) + .allMatch(e -> e.getEdgeSource().equals(v))) // all the incoming edges on the child vertices should be self + .findFirst().orElseThrow(() -> + new IllegalArgumentException("Cannot autodetect protobuf Message.") + ); + } + + public ProtobufMessage findMessage(String messageName) throws IllegalArgumentException { + return (ProtobufMessage) vertexSet().stream() + .filter(v -> v instanceof ProtobufMessage && messageName.equals(v.fullName())) + .findFirst().orElseThrow(() -> new IllegalArgumentException(String.format("Cannot find protobuf Message %s", messageName))); + } + + private void buildProtobufGraph(DescriptorProtos.FileDescriptorSet fileSet) { + // Attach fields to messages + Map> messageFieldMap = fileSet.getFileList().stream().flatMap(fileProto -> + fileProto.getMessageTypeList().stream().flatMap(messageProto -> { + + ProtobufMessage messageVertex = ProtobufMessage.builder() + .fileProto(fileProto) + .messageProto(messageProto) + .build(); + addVertex(messageVertex); + + // Handle nested fields + Stream nestedFields = addNestedMessage(fileProto, messageProto); + + // Add enum types + addEnum(fileProto, messageProto); + + // handle normal fields and oneofs + Stream fields = messageProto.getFieldList().stream().flatMap(fieldProto -> { + ProtobufField fieldVertex = ProtobufField.builder() + .protobufMessage(messageVertex) + .fieldProto(fieldProto) + .build(); + + // Add field vertex + addVertex(fieldVertex); + + if (fieldVertex.oneOfProto() != null) { + // Handle oneOf + return addOneOf(messageVertex, fieldVertex); + } else { + // Add schema to field edge + return linkMessageToField(messageVertex, fieldVertex); + } + }); + + return Stream.concat(nestedFields, fields); + }) + ).collect(Collectors.groupingBy(ProtobufField::parentMessageName)); + + attachMessagesToFields(messageFieldMap); + } + + private void attachMessagesToFields(Map> messageFieldMap) { + // Connect field to Message + List messageFieldEdges = edgeSet().stream() + .filter(FieldTypeEdge::isMessageType) + .collect(Collectors.toList()); + + messageFieldEdges.forEach(e -> { + ProtobufField source = (ProtobufField) e.getEdgeTarget(); + + List targetFields = messageFieldMap.get(source.nativeType()); + if (targetFields != null) { + targetFields.forEach(target -> + FieldTypeEdge.builder() + .edgeSource(source) + .edgeTarget(target) + .type(target.fieldPathType()) + .isMessageType(target.isMessage()) + .build().inGraph(this) + ); + } + }); + } + + private void addEnum(DescriptorProtos.FileDescriptorProto fileProto, DescriptorProtos.DescriptorProto messageProto) { + messageProto.getEnumTypeList().forEach(enumProto -> { + ProtobufEnum enumVertex = ProtobufEnum.enumBuilder() + .fileProto(fileProto) + .messageProto(messageProto) + .enumProto(enumProto) + .build(); + addVertex(enumVertex); + }); + } + + private Stream addNestedMessage(DescriptorProtos.FileDescriptorProto fileProto, DescriptorProtos.DescriptorProto messageProto) { + return messageProto.getNestedTypeList().stream().flatMap(nestedMessageProto -> { + ProtobufMessage nestedMessageVertex = ProtobufMessage.builder() + .fileProto(fileProto) + .parentMessageProto(messageProto) + .messageProto(nestedMessageProto) + .build(); + addVertex(nestedMessageVertex); + + return nestedMessageProto.getFieldList().stream().map(nestedFieldProto -> { + ProtobufField field = ProtobufField.builder() + .protobufMessage(nestedMessageVertex) + .fieldProto(nestedFieldProto) + .build(); + + // Add field vertex + addVertex(field); + + // Add schema to field edge + if (!field.isMessage()) { + FieldTypeEdge.builder() + .edgeSource(nestedMessageVertex) + .edgeTarget(field) + .type(field.fieldPathType()) + .build().inGraph(this); + } + + return field; + }); + }); + } + + private Stream addOneOf(ProtobufMessage messageVertex, ProtobufField fieldVertex) { + // Handle oneOf + ProtobufField oneOfVertex = ProtobufOneOfField.oneOfBuilder() + .protobufMessage(messageVertex) + .fieldProto(fieldVertex.getFieldProto()) + .build(); + addVertex(oneOfVertex); + + FieldTypeEdge.builder() + .edgeSource(messageVertex) + .edgeTarget(oneOfVertex) + .type(oneOfVertex.fieldPathType()) + .build().inGraph(this); + + // Add oneOf field to field edge + FieldTypeEdge.builder() + .edgeSource(oneOfVertex) + .edgeTarget(fieldVertex) + .type(fieldVertex.fieldPathType()) + .isMessageType(fieldVertex.isMessage()) + .build().inGraph(this); + + return Stream.of(oneOfVertex); + } + + private Stream linkMessageToField(ProtobufMessage messageVertex, ProtobufField fieldVertex) { + FieldTypeEdge.builder() + .edgeSource(messageVertex) + .edgeTarget(fieldVertex) + .type(fieldVertex.fieldPathType()) + .isMessageType(fieldVertex.isMessage()) + .build().inGraph(this); + + return Stream.of(fieldVertex); + } + + private void flattenGoogleWrapped() { + HashSet removeVertices = new HashSet<>(); + HashSet removeEdges = new HashSet<>(); + HashSet addVertices = new HashSet<>(); + HashSet addEdges = new HashSet<>(); + + Set googleWrapped = vertexSet().stream() + .filter(v -> v instanceof ProtobufMessage + && "google/protobuf/wrappers.proto".equals(v.fileProto().getName())) + .collect(Collectors.toSet()); + removeVertices.addAll(googleWrapped); + + Set wrappedPrimitiveFields = googleWrapped.stream() + .flatMap(wrapped -> outgoingEdgesOf(wrapped).stream()) + .map(FieldTypeEdge::getEdgeTarget) + .map(ProtobufField.class::cast) + .collect(Collectors.toSet()); + removeVertices.addAll(wrappedPrimitiveFields); + + wrappedPrimitiveFields.forEach(primitiveField -> { + // remove incoming old edges to primitive + removeEdges.addAll(incomingEdgesOf(primitiveField)); + + Set originatingFields = incomingEdgesOf(primitiveField).stream() + .map(FieldTypeEdge::getEdgeSource) + .filter(edgeSource -> !googleWrapped.contains(edgeSource)) + .map(ProtobufField.class::cast) + .collect(Collectors.toSet()); + removeVertices.addAll(originatingFields); + + originatingFields.forEach(originatingField -> { + // Replacement Field + ProtobufElement fieldVertex = originatingField.toBuilder() + .fieldPathType(primitiveField.fieldPathType()) + .schemaFieldDataType(primitiveField.schemaFieldDataType()) + .isMessageType(false) + .build(); + addVertices.add(fieldVertex); + + // link source field parent directly to primitive + Set incomingEdges = incomingEdgesOf(originatingField); + removeEdges.addAll(incomingEdgesOf(originatingField)); + addEdges.addAll(incomingEdges.stream().map(oldEdge -> + // Replace old edge with new edge to primitive + FieldTypeEdge.builder() + .edgeSource(oldEdge.getEdgeSource()) + .edgeTarget(fieldVertex) + .type(primitiveField.fieldPathType()) + .isMessageType(false) // known primitive + .build()).collect(Collectors.toSet())); + }); + + // remove old fields + removeVertices.addAll(originatingFields); + }); + + // Remove edges + removeAllEdges(removeEdges); + // Remove vertices + removeAllVertices(removeVertices); + // Add vertices + addVertices.forEach(this::addVertex); + // Add edges + addEdges.forEach(e -> e.inGraph(this)); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + if (!super.equals(o)) { + return false; + } + + ProtobufGraph that = (ProtobufGraph) o; + + return rootProtobufMessage.equals(that.rootProtobufMessage); + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + rootProtobufMessage.hashCode(); + return result; + } + + public String getHash() { + return String.valueOf(super.hashCode()); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufMessage.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufMessage.java new file mode 100644 index 00000000000000..6b46b11231623d --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufMessage.java @@ -0,0 +1,122 @@ +package datahub.protobuf.model; + +import com.google.protobuf.DescriptorProtos.DescriptorProto; +import com.google.protobuf.DescriptorProtos.FileDescriptorProto; +import com.linkedin.schema.MapType; +import com.linkedin.schema.RecordType; +import lombok.AllArgsConstructor; +import lombok.Builder; +import java.util.Arrays; +import java.util.stream.Stream; +import com.linkedin.schema.SchemaFieldDataType; + +import datahub.protobuf.ProtobufUtils; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.VisitContext; + + + +@Builder +@AllArgsConstructor +public class ProtobufMessage implements ProtobufElement { + private final DescriptorProto messageProto; + private final DescriptorProto parentMessageProto; + private final FileDescriptorProto fileProto; + + @Override + public String name() { + return messageProto.getName(); + } + + @Override + public String fullName() { + if (parentMessageProto != null) { + return String.join(".", fileProto.getPackage(), parentMessageProto.getName(), name()); + } + return String.join(".", fileProto.getPackage(), name()); + } + + @Override + public String nativeType() { + return fullName(); + } + + @Override + public String fieldPathType() { + return String.format("[type=%s]", nativeType().replace(".", "_")); + } + + @Override + public FileDescriptorProto fileProto() { + return fileProto; + } + + @Override + public DescriptorProto messageProto() { + return messageProto; + } + + public SchemaFieldDataType schemaFieldDataType() { + if (parentMessageProto != null && messageProto.getName().equals("MapFieldEntry")) { + return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new MapType())); + } + return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType())); + } + + public int majorVersion() { + return Integer.parseInt(Arrays.stream(fileProto.getName().split("/")) + .filter(p -> p.matches("^v[0-9]+$")) + .findFirst() + .map(p -> p.replace("v", "")) + .orElse("1")); + } + + @Override + public String comment() { + return messageLocations() + .map(ProtobufUtils::collapseLocationComments) + .findFirst().orElse(""); + } + + @Override + public Stream accept(ProtobufModelVisitor visitor, VisitContext context) { + return visitor.visitMessage(this, context); + } + + @Override + public String toString() { + return String.format("ProtobufMessage[%s]", fullName()); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + ProtobufMessage that = (ProtobufMessage) o; + + if (!fullName().equals(that.fullName())) { + return false; + } + if (!messageProto.equals(that.messageProto)) { + return false; + } + if (parentMessageProto != null ? !parentMessageProto.equals(that.parentMessageProto) : that.parentMessageProto != null) { + return false; + } + return fileProto.equals(that.fileProto); + } + + @Override + public int hashCode() { + int result = messageProto.hashCode(); + result = 31 * result + (parentMessageProto != null ? parentMessageProto.hashCode() : 0); + result = 31 * result + fileProto.hashCode(); + result = 31 * result + fullName().hashCode(); + return result; + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufOneOfField.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufOneOfField.java new file mode 100644 index 00000000000000..1b4db1c0a8c8b9 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufOneOfField.java @@ -0,0 +1,63 @@ +package datahub.protobuf.model; + +import com.google.protobuf.DescriptorProtos.DescriptorProto; +import com.google.protobuf.DescriptorProtos.FieldDescriptorProto; +import com.linkedin.schema.SchemaFieldDataType; +import com.linkedin.schema.UnionType; +import datahub.protobuf.ProtobufUtils; +import lombok.Builder; +import lombok.Getter; + +import java.util.stream.Collectors; + + +@Getter +public class ProtobufOneOfField extends ProtobufField { + + @Builder(builderMethodName = "oneOfBuilder") + public ProtobufOneOfField(ProtobufMessage protobufMessage, + FieldDescriptorProto fieldProto) { + super(protobufMessage, fieldProto, null, null, null, null); + } + + @Override + public String name() { + return oneOfProto().getName(); + } + + @Override + public String fieldPathType() { + return "[type=union]"; + } + + @Override + public String nativeType() { + return "oneof"; + } + + @Override + public boolean isMessage() { + return false; + } + + @Override + public SchemaFieldDataType schemaFieldDataType() throws IllegalStateException { + return new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new UnionType())); + } + + @Override + public String comment() { + return messageLocations() + .filter(loc -> loc.getPathCount() > 3 + && loc.getPath(2) == DescriptorProto.ONEOF_DECL_FIELD_NUMBER + && oneOfProto() == messageProto().getOneofDecl(loc.getPath(3))) + .map(ProtobufUtils::collapseLocationComments) + .collect(Collectors.joining("\n")) + .trim(); + } + + @Override + public String toString() { + return String.format("ProtobufOneOf[%s]", fullName()); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/ProtobufExtensionUtil.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/ProtobufExtensionUtil.java new file mode 100644 index 00000000000000..943363e736a625 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/ProtobufExtensionUtil.java @@ -0,0 +1,109 @@ +package datahub.protobuf.visitors; + +import com.google.protobuf.DescriptorProtos; +import com.google.protobuf.Descriptors; +import com.google.protobuf.ExtensionRegistry; +import com.google.protobuf.InvalidProtocolBufferException; +import com.linkedin.common.GlossaryTermAssociation; +import com.linkedin.common.urn.GlossaryTermUrn; +import com.linkedin.tag.TagProperties; + +import java.util.Collection; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class ProtobufExtensionUtil { + + private ProtobufExtensionUtil() { } + + public static DescriptorProtos.FieldDescriptorProto extendProto(DescriptorProtos.FieldDescriptorProto proto, ExtensionRegistry registry) { + try { + return DescriptorProtos.FieldDescriptorProto.parseFrom(proto.toByteArray(), registry); + } catch (InvalidProtocolBufferException e) { + throw new RuntimeException(e); + } + } + + public enum DataHubMetadataType { + PROPERTY, TAG, TERM; + + public static final String PROTOBUF_TYPE = "DataHubMetadataType"; + } + + public static Map filterByDataHubType(Map options, + ExtensionRegistry registry, DataHubMetadataType filterType) { + return options.entrySet().stream() + .filter(entry -> { + DescriptorProtos.FieldDescriptorProto extendedProtoOptions = extendProto(entry.getKey().toProto(), registry); + Optional dataHubMetadataType = extendedProtoOptions.getOptions().getAllFields().entrySet().stream() + .filter(extEntry -> extEntry.getKey().getJavaType() == Descriptors.FieldDescriptor.JavaType.ENUM) + .flatMap(extEntry -> { + if (extEntry.getKey().isRepeated()) { + return ((Collection) extEntry.getValue()).stream(); + } else { + return Stream.of((Descriptors.EnumValueDescriptor) extEntry.getValue()); + } + }) + .filter(enumDesc -> enumDesc.getType().getFullName().endsWith("." + DataHubMetadataType.PROTOBUF_TYPE)) + .map(enumDesc -> DataHubMetadataType.valueOf(enumDesc.getName())) + .filter(dhmt -> dhmt.equals(filterType)) + .findFirst(); + + return filterType.equals(dataHubMetadataType.orElse(DataHubMetadataType.PROPERTY)); + }) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + + public static Stream extractTagPropertiesFromOptions(Map options, ExtensionRegistry registry) { + return filterByDataHubType(options, registry, DataHubMetadataType.TAG).entrySet().stream() + .filter(e -> e.getKey().isExtension()) + .map(entry -> { + switch (entry.getKey().getJavaType()) { + case STRING: + return new TagProperties() + .setName(String.format("%s.%s", entry.getKey().getName(), entry.getValue().toString())) + .setDescription(entry.getKey().getFullName()); + case BOOLEAN: + if ((boolean) entry.getValue()) { + return new TagProperties() + .setName(entry.getKey().getName()) + .setDescription(String.format("%s is true.", entry.getKey().getFullName())); + } + return null; + case ENUM: + Descriptors.EnumValueDescriptor desc = (Descriptors.EnumValueDescriptor) entry.getValue(); + String name = String.format("%s.%s", desc.getType().getName(), desc.getName()); + String others = entry.getKey().getEnumType().getValues().stream() + .map(Descriptors.EnumValueDescriptor::getName).collect(Collectors.joining(", ")); + return new TagProperties() + .setName(name) + .setDescription(String.format("Enum %s of {%s}", name, others)); + default: + return null; + } + }).filter(Objects::nonNull); + } + + public static Stream extractTermAssociationsFromOptions(Map options, + ExtensionRegistry registry) { + return filterByDataHubType(options, registry, DataHubMetadataType.TERM).entrySet().stream() + .filter(e -> e.getKey().isExtension()) + .map(entry -> { + switch (entry.getKey().getJavaType()) { + case STRING: + return new GlossaryTermAssociation() + .setUrn(new GlossaryTermUrn(entry.getValue().toString())); + case ENUM: + Descriptors.EnumValueDescriptor desc = (Descriptors.EnumValueDescriptor) entry.getValue(); + String name = String.format("%s.%s", desc.getType().getName(), desc.getName()); + return new GlossaryTermAssociation() + .setUrn(new GlossaryTermUrn(name)); + default: + return null; + } + }).filter(Objects::nonNull); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/ProtobufModelVisitor.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/ProtobufModelVisitor.java new file mode 100644 index 00000000000000..b5c630302d9467 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/ProtobufModelVisitor.java @@ -0,0 +1,24 @@ +package datahub.protobuf.visitors; + + +import datahub.protobuf.model.ProtobufElement; +import datahub.protobuf.model.ProtobufField; +import datahub.protobuf.model.ProtobufMessage; + +import java.util.stream.Stream; + +public interface ProtobufModelVisitor { + default Stream visitField(ProtobufField field, VisitContext context) { + return visitElement(field, context); + } + default Stream visitMessage(ProtobufMessage message, VisitContext context) { + return visitElement(message, context); + } + + default Stream visitElement(ProtobufElement element, VisitContext context) { + return Stream.of(); + } + default Stream visitGraph(VisitContext context) { + return Stream.of(); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/VisitContext.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/VisitContext.java new file mode 100644 index 00000000000000..984215fa9e389d --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/VisitContext.java @@ -0,0 +1,58 @@ +package datahub.protobuf.visitors; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.DatasetUrn; +import datahub.protobuf.model.FieldTypeEdge; +import datahub.protobuf.model.ProtobufElement; +import datahub.protobuf.model.ProtobufField; +import datahub.protobuf.model.ProtobufGraph; +import datahub.protobuf.model.ProtobufMessage; +import lombok.Builder; +import lombok.Getter; +import org.jgrapht.GraphPath; + +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +@Builder +@Getter +public class VisitContext { + public static final String FIELD_PATH_VERSION = "[version=2.0]"; + + private final DatasetUrn datasetUrn; + private final ProtobufGraph graph; + private final AuditStamp auditStamp; + + public ProtobufMessage root() { + return graph.root(); + } + + public Optional> getFirstFieldPath(ProtobufField field) { + return graph.getAllPaths(root(), field).stream().findFirst(); + } + + public String getFieldPath(GraphPath path) { + String fieldPathString = path.getEdgeList().stream() + .flatMap(e -> Stream.of(e.getType(), e.getEdgeTarget().name())) + .collect(Collectors.joining(".")); + return String.join(".", FIELD_PATH_VERSION, root().fieldPathType(), fieldPathString); + } + + // This is because order matters for the frontend. Both for matching the protobuf field order + // and also the nested struct's fieldPaths + public Double calculateSortOrder(GraphPath path, ProtobufField field) { + List weights = path.getEdgeList().stream() + .map(FieldTypeEdge::getEdgeTarget) + .filter(f -> f instanceof ProtobufField) + .map(f -> ((ProtobufField) f).sortWeight()) + .collect(Collectors.toList()); + + return IntStream.range(0, weights.size()) + .mapToDouble(i -> weights.get(i) * (1.0 / (i + 1))) + .reduce(Double::sum) + .orElse(0); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/DatasetVisitor.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/DatasetVisitor.java new file mode 100644 index 00000000000000..4ae92a6da5d908 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/DatasetVisitor.java @@ -0,0 +1,79 @@ +package datahub.protobuf.visitors.dataset; + +import com.linkedin.common.GlobalTags; +import com.linkedin.common.GlossaryTermAssociation; +import com.linkedin.common.GlossaryTermAssociationArray; +import com.linkedin.common.GlossaryTerms; +import com.linkedin.common.InstitutionalMemory; +import com.linkedin.common.InstitutionalMemoryMetadata; +import com.linkedin.common.InstitutionalMemoryMetadataArray; +import com.linkedin.common.TagAssociation; +import com.linkedin.common.TagAssociationArray; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.data.template.StringMap; +import com.linkedin.dataset.DatasetProperties; +import com.linkedin.events.metadata.ChangeType; +import datahub.protobuf.model.ProtobufGraph; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.VisitContext; +import datahub.event.MetadataChangeProposalWrapper; +import lombok.AllArgsConstructor; +import lombok.Builder; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +@Builder +@AllArgsConstructor +public class DatasetVisitor implements ProtobufModelVisitor> { + @Builder.Default + private final List> institutionalMemoryMetadataVisitors = List.of(); + @Builder.Default + private final List> datasetPropertyVisitors = List.of(); + @Builder.Default + private final List> tagAssociationVisitors = List.of(); + @Builder.Default + private final List> termAssociationVisitors = List.of(); + @Builder.Default + private final String protocBase64 = ""; + @Builder.Default + private final ProtobufModelVisitor descriptionVisitor = new DescriptionVisitor(); + + @Override + public Stream> visitGraph(VisitContext context) { + final String datasetUrn = context.getDatasetUrn().toString(); + final ProtobufGraph g = context.getGraph(); + + return Stream.of( + new MetadataChangeProposalWrapper<>(DatasetUrn.ENTITY_TYPE, datasetUrn, ChangeType.UPSERT, new DatasetProperties() + .setDescription(g.accept(context, List.of(descriptionVisitor)).collect(Collectors.joining("\n"))) + .setCustomProperties(new StringMap( + Stream.concat( + Stream.of(Map.entry("protoc", protocBase64)), + g.accept(context, datasetPropertyVisitors).flatMap(props -> props.getCustomProperties().entrySet().stream())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)) + )), "datasetProperties"), + new MetadataChangeProposalWrapper<>(DatasetUrn.ENTITY_TYPE, datasetUrn, ChangeType.UPSERT, new InstitutionalMemory().setElements( + new InstitutionalMemoryMetadataArray( + g.accept(context, institutionalMemoryMetadataVisitors) + .map(inst -> inst.setCreateStamp(context.getAuditStamp())) + .collect(Collectors.toMap(InstitutionalMemoryMetadata::getUrl, Function.identity(), + (a1, a2) -> a1, LinkedHashMap::new)) + .values() + )), "institutionalMemory"), + new MetadataChangeProposalWrapper<>(DatasetUrn.ENTITY_TYPE, datasetUrn, ChangeType.UPSERT, + new GlobalTags().setTags(new TagAssociationArray( + g.accept(context, tagAssociationVisitors).collect(Collectors.toList()) + )), "globalTags"), + new MetadataChangeProposalWrapper<>(DatasetUrn.ENTITY_TYPE, datasetUrn, ChangeType.UPSERT, + new GlossaryTerms().setTerms(new GlossaryTermAssociationArray( + g.accept(context, termAssociationVisitors).collect(Collectors.toList()) + )).setAuditStamp(context.getAuditStamp()), "glossaryTerms") + ); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/DescriptionVisitor.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/DescriptionVisitor.java new file mode 100644 index 00000000000000..802c7e0c05408e --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/DescriptionVisitor.java @@ -0,0 +1,14 @@ +package datahub.protobuf.visitors.dataset; + +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.VisitContext; + +import java.util.stream.Stream; + +public class DescriptionVisitor implements ProtobufModelVisitor { + + @Override + public Stream visitGraph(VisitContext context) { + return Stream.of(context.root().comment()); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitor.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitor.java new file mode 100644 index 00000000000000..b6f52fe01c1096 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitor.java @@ -0,0 +1,140 @@ +package datahub.protobuf.visitors.dataset; + +import com.linkedin.common.InstitutionalMemoryMetadata; +import com.linkedin.common.url.Url; +import datahub.protobuf.model.ProtobufField; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.VisitContext; + +import javax.annotation.Nullable; +import java.util.LinkedList; +import java.util.List; +import java.util.Optional; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.function.Consumer; +import java.util.regex.MatchResult; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +public class InstitutionalMemoryVisitor implements ProtobufModelVisitor { + public static final String TEAM_DESC = "Github Team"; + public static final String SLACK_CHAN_DESC = "Slack Channel"; + + private static final Pattern SLACK_CHANNEL_REGEX = Pattern.compile("(?si).*#([a-z0-9-]+).*"); + private static final Pattern LINK_REGEX = Pattern.compile("(?s)(\\b(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|])"); + private final String githubOrganization; + private final Pattern githubTeamRegex; + private final String slackTeamId; + + public InstitutionalMemoryVisitor(@Nullable String slackTeamId, @Nullable String githubOrganization) { + this.slackTeamId = slackTeamId; + this.githubOrganization = githubOrganization; + if (githubOrganization != null) { + this.githubTeamRegex = Pattern.compile(String.format("(?si).*@%s/([a-z-]+).*", githubOrganization)); + } else { + this.githubTeamRegex = null; + } + } + + // https://slack.com/app_redirect?channel=fdn-analytics-data-catalog&team=T024F4EL1 + private Optional slackLink(String text) { + return Optional.ofNullable(slackTeamId).map(teamId -> { + Matcher m = SLACK_CHANNEL_REGEX.matcher(text); + if (m.matches()) { + return new Url(String.format("https://slack.com/app_redirect?channel=%s&team=%s", m.group(1), slackTeamId)); + } else { + return null; + } + }); + } + + private Optional teamLink(String text) { + return Optional.ofNullable(githubTeamRegex).map(regex -> { + Matcher m = regex.matcher(text); + if (m.matches()) { + return new Url(String.format("https://github.com/orgs/%s/teams/%s", githubOrganization, m.group(1))); + } else { + return null; + } + }); + } + + @Override + public Stream visitGraph(VisitContext context) { + List institutionalMemoryMetadata = new LinkedList<>(); + + teamLink(context.root().comment()).ifPresent(url -> + institutionalMemoryMetadata.add(new InstitutionalMemoryMetadata() + .setCreateStamp(context.getAuditStamp()) + .setDescription(TEAM_DESC) + .setUrl(url))); + + + slackLink(context.root().comment()).ifPresent(url -> + institutionalMemoryMetadata.add(new InstitutionalMemoryMetadata() + .setCreateStamp(context.getAuditStamp()) + .setDescription(SLACK_CHAN_DESC) + .setUrl(url))); + + final int[] cnt = {0}; + MatcherStream.findMatches(LINK_REGEX, context.root().comment()).forEach(match -> { + cnt[0] += 1; + institutionalMemoryMetadata.add(new InstitutionalMemoryMetadata() + .setCreateStamp(context.getAuditStamp()) + .setDescription(String.format("%s Reference %d", context.root().name(), cnt[0])) + .setUrl(new Url(match.group(1)))); + }); + + return institutionalMemoryMetadata.stream(); + } + + @Override + public Stream visitField(ProtobufField field, VisitContext context) { + List institutionalMemoryMetadata = new LinkedList<>(); + + if (field.messageProto().equals(context.getGraph().root().messageProto())) { + final int[] cnt = {0}; + MatcherStream.findMatches(LINK_REGEX, field.comment()).forEach(match -> { + cnt[0] += 1; + institutionalMemoryMetadata.add(new InstitutionalMemoryMetadata() + .setCreateStamp(context.getAuditStamp()) + .setDescription(String.format("%s.%s Reference %d", + field.getProtobufMessage().name(), + field.getFieldProto().getName(), + cnt[0])) + .setUrl(new Url(match.group(1)))); + }); + } + + return institutionalMemoryMetadata.stream(); + } + + private static class MatcherStream { + private MatcherStream() { } + + public static Stream find(Pattern pattern, CharSequence input) { + return findMatches(pattern, input).map(MatchResult::group); + } + + public static Stream findMatches( + Pattern pattern, CharSequence input) { + Matcher matcher = pattern.matcher(input); + + Spliterator spliterator = new Spliterators.AbstractSpliterator( + Long.MAX_VALUE, Spliterator.ORDERED | Spliterator.NONNULL) { + @Override + public boolean tryAdvance(Consumer action) { + if (!matcher.find()) { + return false; + } + action.accept(matcher.toMatchResult()); + return true; + } }; + + return StreamSupport.stream(spliterator, false); + } + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitor.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitor.java new file mode 100644 index 00000000000000..57ec38611d47f2 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitor.java @@ -0,0 +1,31 @@ +package datahub.protobuf.visitors.dataset; + +import com.linkedin.data.template.StringMap; +import com.linkedin.dataset.DatasetProperties; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.VisitContext; + +import java.util.Map; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Stream; + + +public class KafkaTopicPropertyVisitor implements ProtobufModelVisitor { + + @Override + public Stream visitGraph(VisitContext context) { + return getKafkaTopic(context.root().comment()).stream().map(kafkaTopic -> + new DatasetProperties() + .setCustomProperties(new StringMap(Map.of("kafka_topic", kafkaTopic))) + ); + } + + private static final Pattern TOPIC_NAME_REGEX = Pattern.compile("(?si).*kafka.+topic.+[`]([a-z._-]+)[`].*"); + + private static Optional getKafkaTopic(String text) { + Matcher m = TOPIC_NAME_REGEX.matcher(text); + return m.matches() ? Optional.of(m.group(1)) : Optional.empty(); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/ProtobufExtensionPropertyVisitor.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/ProtobufExtensionPropertyVisitor.java new file mode 100644 index 00000000000000..325b7e0c3908a2 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/ProtobufExtensionPropertyVisitor.java @@ -0,0 +1,43 @@ +package datahub.protobuf.visitors.dataset; + +import com.google.protobuf.ByteString; +import com.google.protobuf.DescriptorProtos; +import com.google.protobuf.Descriptors; +import com.linkedin.data.template.StringMap; +import com.linkedin.dataset.DatasetProperties; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.ProtobufExtensionUtil; +import datahub.protobuf.visitors.VisitContext; + +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + + +public class ProtobufExtensionPropertyVisitor implements ProtobufModelVisitor { + + @Override + public Stream visitGraph(VisitContext context) { + Map properties = ProtobufExtensionUtil.filterByDataHubType(context.root().messageProto() + .getOptions().getAllFields(), context.getGraph().getRegistry(), ProtobufExtensionUtil.DataHubMetadataType.PROPERTY) + .entrySet().stream().flatMap(fd -> { + if (fd.getKey().getJavaType() != Descriptors.FieldDescriptor.JavaType.MESSAGE) { + return Stream.of(Map.entry(fd.getKey().getName(), fd.getValue().toString())); + } else { + Descriptors.FieldDescriptor field = fd.getKey(); + DescriptorProtos.DescriptorProto value = (DescriptorProtos.DescriptorProto) fd.getValue(); + return getProperties(field, value); + } + }).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + return Stream.of(new DatasetProperties().setCustomProperties(new StringMap(properties))); + } + + private static Stream> getProperties(Descriptors.FieldDescriptor field, DescriptorProtos.DescriptorProto value) { + return value.getUnknownFields().asMap().entrySet().stream().map(unknown -> { + Descriptors.FieldDescriptor fieldDesc = field.getMessageType().findFieldByNumber(unknown.getKey()); + String fieldValue = unknown.getValue().getLengthDelimitedList().stream().map(ByteString::toStringUtf8).collect(Collectors.joining("")); + return Map.entry(String.join(".", field.getFullName(), fieldDesc.getName()), fieldValue); + }); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/ProtobufExtensionTagAssocVisitor.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/ProtobufExtensionTagAssocVisitor.java new file mode 100644 index 00000000000000..a19fc48099a7d6 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/ProtobufExtensionTagAssocVisitor.java @@ -0,0 +1,20 @@ +package datahub.protobuf.visitors.dataset; + +import com.linkedin.common.TagAssociation; +import com.linkedin.common.urn.TagUrn; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.ProtobufExtensionUtil; +import datahub.protobuf.visitors.VisitContext; + +import java.util.stream.Stream; + + +public class ProtobufExtensionTagAssocVisitor implements ProtobufModelVisitor { + + @Override + public Stream visitGraph(VisitContext context) { + return ProtobufExtensionUtil.extractTagPropertiesFromOptions(context.root().messageProto().getOptions() + .getAllFields(), context.getGraph().getRegistry()) + .map(tag -> new TagAssociation().setTag(new TagUrn(tag.getName()))); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/ProtobufExtensionTermAssocVisitor.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/ProtobufExtensionTermAssocVisitor.java new file mode 100644 index 00000000000000..00227954f870d7 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/dataset/ProtobufExtensionTermAssocVisitor.java @@ -0,0 +1,17 @@ +package datahub.protobuf.visitors.dataset; + +import com.linkedin.common.GlossaryTermAssociation; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.ProtobufExtensionUtil; +import datahub.protobuf.visitors.VisitContext; + +import java.util.stream.Stream; + +public class ProtobufExtensionTermAssocVisitor implements ProtobufModelVisitor { + + @Override + public Stream visitGraph(VisitContext context) { + return ProtobufExtensionUtil.extractTermAssociationsFromOptions(context.root().messageProto().getOptions().getAllFields(), + context.getGraph().getRegistry()); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitor.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitor.java new file mode 100644 index 00000000000000..97ef223097f14a --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitor.java @@ -0,0 +1,91 @@ +package datahub.protobuf.visitors.field; + +import com.linkedin.common.GlobalTags; +import com.linkedin.common.GlossaryTermAssociation; +import com.linkedin.common.GlossaryTermAssociationArray; +import com.linkedin.common.GlossaryTerms; +import com.linkedin.common.TagAssociation; +import com.linkedin.common.TagAssociationArray; +import com.linkedin.common.urn.TagUrn; +import com.linkedin.schema.SchemaField; +import com.linkedin.tag.TagProperties; +import com.linkedin.util.Pair; +import datahub.protobuf.model.ProtobufField; +import datahub.protobuf.visitors.ProtobufExtensionUtil; +import datahub.protobuf.visitors.VisitContext; + +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class ProtobufExtensionFieldVisitor extends SchemaFieldVisitor { + + @Override + public Stream> visitField(ProtobufField field, VisitContext context) { + boolean isPrimaryKey = field.getFieldProto().getOptions().getAllFields().keySet().stream() + .anyMatch(fieldDesc -> fieldDesc.getName().matches("(?i).*primary_?key")); + + List tags = Stream.concat( + ProtobufExtensionUtil.extractTagPropertiesFromOptions( + field.getFieldProto().getOptions().getAllFields(), + context.getGraph().getRegistry()), + promotedTags(field, context)) + .distinct().map(tag -> new TagAssociation().setTag(new TagUrn(tag.getName()))) + .sorted(Comparator.comparing(t -> t.getTag().getName())) + .collect(Collectors.toList()); + + List terms = Stream.concat( + ProtobufExtensionUtil.extractTermAssociationsFromOptions( + field.getFieldProto().getOptions().getAllFields(), context.getGraph().getRegistry()), + promotedTerms(field, context)) + .distinct() + .sorted(Comparator.comparing(a -> a.getUrn().getNameEntity())) + .collect(Collectors.toList()); + + return context.getFirstFieldPath(field).map(path -> Pair.of( + new SchemaField() + .setFieldPath(context.getFieldPath(path)) + .setNullable(!isPrimaryKey) + .setIsPartOfKey(isPrimaryKey) + .setDescription(field.comment()) + .setNativeDataType(field.nativeType()) + .setType(field.schemaFieldDataType()) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray(tags))) + .setGlossaryTerms(new GlossaryTerms() + .setTerms(new GlossaryTermAssociationArray(terms)) + .setAuditStamp(context.getAuditStamp())), + context.calculateSortOrder(path, field))).stream(); + } + + /** + * Promote tags from nested message to field. + * @return tags + */ + private Stream promotedTags(ProtobufField field, VisitContext context) { + if (field.isMessage()) { + return context.getGraph().outgoingEdgesOf(field).stream().flatMap(e -> + ProtobufExtensionUtil.extractTagPropertiesFromOptions(e.getEdgeTarget().messageProto() + .getOptions().getAllFields(), context.getGraph().getRegistry()) + ).distinct(); + } else { + return Stream.of(); + } + } + + /** + * Promote terms from nested message to field. + * @return terms + */ + private Stream promotedTerms(ProtobufField field, VisitContext context) { + if (field.isMessage()) { + return context.getGraph().outgoingEdgesOf(field).stream().flatMap(e -> + ProtobufExtensionUtil.extractTermAssociationsFromOptions(e.getEdgeTarget().messageProto() + .getOptions().getAllFields(), context.getGraph().getRegistry()) + ).distinct(); + } else { + return Stream.of(); + } + } + +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/field/SchemaFieldVisitor.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/field/SchemaFieldVisitor.java new file mode 100644 index 00000000000000..9a3333ea4d6603 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/field/SchemaFieldVisitor.java @@ -0,0 +1,25 @@ +package datahub.protobuf.visitors.field; + +import com.linkedin.schema.SchemaField; +import com.linkedin.util.Pair; +import datahub.protobuf.model.ProtobufField; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.VisitContext; + +import java.util.stream.Stream; + +public class SchemaFieldVisitor implements ProtobufModelVisitor> { + + @Override + public Stream> visitField(ProtobufField field, VisitContext context) { + return context.getFirstFieldPath(field).map(path -> + Pair.of( + new SchemaField() + .setFieldPath(context.getFieldPath(path)) + .setNullable(true) + .setDescription(field.comment()) + .setNativeDataType(field.nativeType()) + .setType(field.schemaFieldDataType()), + context.calculateSortOrder(path, field))).stream(); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/tags/ProtobufExtensionTagVisitor.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/tags/ProtobufExtensionTagVisitor.java new file mode 100644 index 00000000000000..07f2931a1af7bb --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/visitors/tags/ProtobufExtensionTagVisitor.java @@ -0,0 +1,40 @@ +package datahub.protobuf.visitors.tags; + +import com.linkedin.common.urn.TagUrn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.tag.TagProperties; +import datahub.protobuf.model.ProtobufField; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.ProtobufExtensionUtil; +import datahub.protobuf.visitors.VisitContext; +import datahub.event.MetadataChangeProposalWrapper; + +import java.util.stream.Stream; + +public class ProtobufExtensionTagVisitor implements ProtobufModelVisitor> { + private static final String TAG_PROPERTIES_ASPECT = "tagProperties"; + + @Override + public Stream> visitGraph(VisitContext context) { + return ProtobufExtensionUtil.extractTagPropertiesFromOptions(context.root().messageProto().getOptions() + .getAllFields(), context.getGraph().getRegistry()) + .map(ProtobufExtensionTagVisitor::wrapTagProperty); + } + + @Override + public Stream> visitField(ProtobufField field, VisitContext context) { + return ProtobufExtensionUtil.extractTagPropertiesFromOptions(field.getFieldProto().getOptions().getAllFields(), + context.getGraph().getRegistry()) + .map(ProtobufExtensionTagVisitor::wrapTagProperty); + } + + private static MetadataChangeProposalWrapper wrapTagProperty(TagProperties tagProperty) { + return new MetadataChangeProposalWrapper<>( + "tag", + new TagUrn(tagProperty.getName()).toString(), + ChangeType.UPSERT, + tagProperty, + TAG_PROPERTIES_ASPECT); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/resources/protobuf/meta.proto b/metadata-integration/java/datahub-protobuf/src/main/resources/protobuf/meta.proto new file mode 100644 index 00000000000000..d3ffed747206c3 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/resources/protobuf/meta.proto @@ -0,0 +1,38 @@ +syntax = "proto3"; + +package meta; + +/* + This is assigned to metadata fields. It describes how the metadata field should be represented + in DataHub. This enum must be used in the `meta` package. Multiple can be used for the same + metadata annotation. This allows a single piece of information to be captured in DataHub + as a property, tag and/or term. + + Tags can be strings, enums, or booleans + Terms can be strings or enums + Properties should be strings + +*/ +enum DataHubMetadataType { + PROPERTY = 0; // Datahub Custom Property + TAG = 1; // Datahub Tag + TERM = 2; // Datahub Term +} + +/* + Example below: The following is not required for annotation processing. This is an example + of creating an annotation using an enum. + */ + +enum MetaEnumExample { + UNKNOWN = 0; + ENTITY = 1; + EVENT = 2; +} + +// Assuming Glossary Term defined from bootstrap example +enum Classification { + HighlyConfidential = 0; + Confidential = 1; + Sensitive = 2; +} diff --git a/metadata-integration/java/datahub-protobuf/src/main/resources/protobuf/meta_field_options.proto b/metadata-integration/java/datahub-protobuf/src/main/resources/protobuf/meta_field_options.proto new file mode 100644 index 00000000000000..c2a6683a623b55 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/resources/protobuf/meta_field_options.proto @@ -0,0 +1,30 @@ +syntax = "proto3"; + +package meta.fld; + +import "google/protobuf/descriptor.proto"; + +import "protobuf/meta.proto"; + + +extend google.protobuf.FieldOptions { + // Required: Mark option field with how to export to DataHub in one or more places. + repeated meta.DataHubMetadataType type = 6000; + + /* + Examples below: The following is not required for annotation processing. + */ + + // Set true if the field is a primary key. This works for any boolean with `primary_key` in it. + bool is_primary_key = 6010; + + // Extract classification field option as a Term, either works + string classification = 6001 [(meta.fld.type) = TERM]; + meta.Classification classification_enum = 6002 [(meta.fld.type) = TERM]; + + // Expose this option as a tag on the field. + string product_type = 70004 [(meta.fld.type) = TAG]; + bool product_type_bool = 70005 [(meta.fld.type) = TAG]; + meta.MetaEnumExample product_type_enum = 70006 [(meta.fld.type) = TAG]; +} + diff --git a/metadata-integration/java/datahub-protobuf/src/main/resources/protobuf/meta_message_options.proto b/metadata-integration/java/datahub-protobuf/src/main/resources/protobuf/meta_message_options.proto new file mode 100644 index 00000000000000..d3a55bf92e6f42 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/main/resources/protobuf/meta_message_options.proto @@ -0,0 +1,29 @@ +syntax = "proto3"; + +package meta.msg; + +import "google/protobuf/descriptor.proto"; + +import "protobuf/meta.proto"; +import "protobuf/meta_field_options.proto"; + + +extend google.protobuf.MessageOptions { + /* + Examples below: The following is not required for annotation processing. + */ + + // Place the classification term at the Message/Dataset level, either string or enum is supported + string classification = 4000 [(meta.fld.type) = TERM, (meta.fld.type) = PROPERTY]; + meta.Classification classification_enum = 4001 [(meta.fld.type) = TERM, (meta.fld.type) = PROPERTY]; + + // Attach these Message/Dataset options as a tag and property. + string product = 5001 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + string project = 5002 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + string team = 5003 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + + string domain = 60003 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + meta.MetaEnumExample type = 60004 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + bool bool_feature = 60005 [(meta.fld.type) = TAG]; + string alert_channel = 60007 [(meta.fld.type) = PROPERTY]; +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java new file mode 100644 index 00000000000000..6239802e318e46 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufDatasetTest.java @@ -0,0 +1,382 @@ +package datahub.protobuf; + +import com.linkedin.common.GlobalTags; +import com.linkedin.common.GlossaryTermAssociationArray; +import com.linkedin.common.GlossaryTerms; +import com.linkedin.common.InstitutionalMemory; +import com.linkedin.common.InstitutionalMemoryMetadata; +import com.linkedin.common.InstitutionalMemoryMetadataArray; +import com.linkedin.common.Status; +import com.linkedin.common.TagAssociationArray; +import com.linkedin.common.url.Url; +import com.linkedin.data.template.StringArray; +import com.linkedin.schema.ArrayType; +import com.linkedin.schema.BooleanType; +import com.linkedin.schema.BytesType; +import com.linkedin.schema.NumberType; +import com.linkedin.schema.RecordType; +import com.linkedin.schema.SchemaField; +import com.linkedin.schema.SchemaFieldDataType; +import com.linkedin.schema.SchemaMetadata; +import com.linkedin.schema.StringType; +import com.linkedin.schema.UnionType; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.FabricType; +import com.linkedin.util.Pair; +import datahub.protobuf.model.ProtobufField; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.VisitContext; +import org.junit.Test; + +import java.io.IOException; +import java.util.stream.Stream; + +import static datahub.protobuf.TestFixtures.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + + +public class ProtobufDatasetTest { + + @Test + public void noSchemaTest() throws IOException { + ProtobufDataset dataset = ProtobufDataset.builder() + .setDataPlatformUrn(new DataPlatformUrn("kafka")) + .setProtocIn(getTestProtoc("protobuf", "messageA")) + .setAuditStamp(TEST_AUDIT_STAMP) + .setFabricType(FabricType.DEV) + .build(); + + assertNotNull(dataset); + assertEquals(2, dataset.getAllMetadataChangeProposals().count()); + assertEquals(6, dataset.getDatasetMCPs().size()); + assertEquals(0, dataset.getVisitorMCPs().size()); + } + + @Test + public void platformSchemaTest() throws IOException { + assertEquals(getTestProtoSource("protobuf", "messageA"), + extractDocumentSchema(getTestProtobufDataset("protobuf", "messageA"))); + } + + @Test + public void messageA() throws IOException { + ProtobufDataset test = getTestProtobufDataset("protobuf", "messageA"); + + assertEquals("urn:li:dataset:(urn:li:dataPlatform:kafka,protobuf.MessageA,TEST)", + test.getDatasetUrn().toString()); + + SchemaMetadata testMetadata = test.getSchemaMetadata(); + + assertEquals(1, testMetadata.getVersion()); + assertEquals(9, testMetadata.getFields().size()); + + + assertEquals("platform.topic", extractCustomProperty(test.getDatasetMCPs().get(0), "kafka_topic")); + + assertEquals(new InstitutionalMemory().setElements(new InstitutionalMemoryMetadataArray( + new InstitutionalMemoryMetadata() + .setDescription("Github Team") + .setCreateStamp(TEST_AUDIT_STAMP) + .setUrl(new Url("https://github.com/orgs/myOrg/teams/teama")), + new InstitutionalMemoryMetadata() + .setDescription("Slack Channel") + .setCreateStamp(TEST_AUDIT_STAMP) + .setUrl(new Url("https://slack.com/app_redirect?channel=test-slack&team=SLACK123")), + new InstitutionalMemoryMetadata() + .setCreateStamp(TEST_AUDIT_STAMP) + .setDescription("MessageA Reference 1") + .setUrl(new Url("https://some/link")), + new InstitutionalMemoryMetadata() + .setCreateStamp(TEST_AUDIT_STAMP) + .setDescription("MessageA Reference 2") + .setUrl(new Url("https://www.google.com/search?q=protobuf+messages")), + new InstitutionalMemoryMetadata() + .setCreateStamp(TEST_AUDIT_STAMP) + .setDescription("MessageA Reference 3") + .setUrl(new Url("https://github.com/apache/kafka")), + new InstitutionalMemoryMetadata() + .setCreateStamp(TEST_AUDIT_STAMP) + .setDescription("MessageA.map_field Reference 1") + .setUrl(new Url("https://developers.google.com/protocol-buffers/docs/proto3#maps")))).data(), + test.getDatasetMCPs().get(1).getAspect().data()); + + assertEquals(new Status().setRemoved(false).data(), test.getDatasetMCPs().get(test.getDatasetMCPs().size() - 1).getAspect().data()); + + assertEquals(new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageA].[type=bytes].sequence_id") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BytesType()))) + .setNativeDataType("bytes") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("Leading single line comment") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageA].[type=bytes].sequence_id")).findFirst().orElseThrow()); + + assertEquals(new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageA].[type=int].position") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))) + .setNativeDataType("uint32") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("Leading multiline comment\nSecond line of leading multiline comment") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageA].[type=int].position")).findFirst().orElseThrow()); + + assertEquals(new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageA].[type=int].total") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))) + .setNativeDataType("uint32") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("Detached comment") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageA].[type=int].total")).findFirst().orElseThrow()); + + assertEquals(new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageA].[type=array].[type=long].repeated_num") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new ArrayType().setNestedType(new StringArray())))) + .setNativeDataType("uint64") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("Test repeated and trailing comment") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageA].[type=array].[type=long].repeated_num")).findFirst().orElseThrow()); + + assertEquals(new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageA].[type=array].[type=string].repeated_str") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new ArrayType().setNestedType(new StringArray())))) + .setNativeDataType("string") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageA].[type=array].[type=string].repeated_str")).findFirst().orElseThrow()); + + } + + @Test + public void messageB() throws IOException { + ProtobufDataset test = getTestProtobufDataset("protobuf", "messageB"); + + assertEquals("urn:li:dataset:(urn:li:dataPlatform:kafka,protobuf.MessageB,TEST)", + test.getDatasetUrn().toString()); + + SchemaMetadata testMetadata = test.getSchemaMetadata(); + + assertEquals(1, testMetadata.getVersion()); + assertEquals(13, testMetadata.getFields().size()); + + assertEquals(new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageB].[type=long].id") + .setNullable(true) + .setIsPartOfKey(false) + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))) + .setNativeDataType("google.protobuf.Int64Value") + .setDescription("wrapped int64") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(test.getAuditStamp())), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageB].[type=long].id")).findFirst().orElseThrow()); + + assertEquals(new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageB].[type=boolean].hot") + .setNullable(true) + .setIsPartOfKey(false) + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType()))) + .setNativeDataType("google.protobuf.BoolValue") + .setDescription("Indicator") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(test.getAuditStamp())), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageB].[type=boolean].hot")).findFirst().orElseThrow()); + + + assertEquals(new SchemaField() + .setNullable(true) + .setIsPartOfKey(false) + .setFieldPath("[version=2.0].[type=protobuf_MessageB].[type=string].value") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))) + .setNativeDataType("string") + .setDescription("message value") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(test.getAuditStamp())), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageB].[type=string].value")).findFirst().orElseThrow()); + + } + + @Test + public void messageC() throws IOException { + ProtobufDataset test = getTestProtobufDataset("protobuf", "messageC"); + + + assertEquals("urn:li:dataset:(urn:li:dataPlatform:kafka,protobuf.MessageC,TEST)", + test.getDatasetUrn().toString()); + + SchemaMetadata testMetadata = test.getSchemaMetadata(); + + assertEquals(1, testMetadata.getVersion()); + assertEquals(4, testMetadata.getFields().size()); + + assertEquals(new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageC].[type=union].one_of_field") + .setNullable(true) + .setIsPartOfKey(false) + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new UnionType()))) + .setNativeDataType("oneof") + .setDescription("one of field comment") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(test.getAuditStamp())), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageC].[type=union].one_of_field")).findFirst().orElseThrow()); + + assertEquals(new SchemaField() + .setNullable(true) + .setIsPartOfKey(false) + .setFieldPath("[version=2.0].[type=protobuf_MessageC].[type=union].one_of_field.[type=string].one_of_string") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))) + .setNativeDataType("string") + .setDescription("one of string comment") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(test.getAuditStamp())), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageC].[type=union].one_of_field.[type=string].one_of_string")).findFirst().orElseThrow()); + + assertEquals(new SchemaField() + .setNullable(true) + .setIsPartOfKey(false) + .setFieldPath("[version=2.0].[type=protobuf_MessageC].[type=union].one_of_field.[type=int].one_of_int") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))) + .setNativeDataType("int32") + .setDescription("one of int comment") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(test.getAuditStamp())), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageC].[type=union].one_of_field.[type=int].one_of_int")).findFirst().orElseThrow()); + } + + @Test + @SuppressWarnings("LineLength") + public void messageC2NestedOneOf() throws IOException { + ProtobufDataset test = getTestProtobufDataset("protobuf", "messageC2"); + + + assertEquals("urn:li:dataset:(urn:li:dataPlatform:kafka,protobuf.MessageC1,TEST)", + test.getDatasetUrn().toString()); + + SchemaMetadata testMetadata = test.getSchemaMetadata(); + + assertEquals(1, testMetadata.getVersion()); + assertEquals(6, testMetadata.getFields().size()); + + assertEquals(new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageC1].[type=protobuf_MessageC2].messageList") + .setNullable(true) + .setIsPartOfKey(false) + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))) + .setNativeDataType("protobuf.MessageC2") + .setDescription("") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(test.getAuditStamp())), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageC1].[type=protobuf_MessageC2].messageList")).findFirst().orElseThrow()); + + assertEquals(new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageC1].[type=protobuf_MessageC2].messageList.[type=array].[type=protobuf_MessageC3].list") + .setNullable(true) + .setIsPartOfKey(false) + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new ArrayType().setNestedType(new StringArray())))) + .setNativeDataType("protobuf.MessageC3") + .setDescription("") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(test.getAuditStamp())), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageC1].[type=protobuf_MessageC2].messageList.[type=array].[type=protobuf_MessageC3].list")).findFirst().orElseThrow()); + + assertEquals(new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageC1].[type=protobuf_MessageC2].messageList.[type=array].[type=protobuf_MessageC3].list.[type=string].normal") + .setNullable(true) + .setIsPartOfKey(false) + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))) + .setNativeDataType("string") + .setDescription("") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(test.getAuditStamp())), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageC1].[type=protobuf_MessageC2].messageList.[type=array].[type=protobuf_MessageC3].list.[type=string].normal")).findFirst().orElseThrow()); + + assertEquals(new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageC1].[type=protobuf_MessageC2].messageList.[type=array].[type=protobuf_MessageC3].list.[type=union].one_of_field") + .setNullable(true) + .setIsPartOfKey(false) + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new UnionType()))) + .setNativeDataType("oneof") + .setDescription("one of field comment") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(test.getAuditStamp())), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageC1].[type=protobuf_MessageC2].messageList.[type=array].[type=protobuf_MessageC3].list.[type=union].one_of_field")).findFirst().orElseThrow()); + + assertEquals(new SchemaField() + .setNullable(true) + .setIsPartOfKey(false) + .setFieldPath("[version=2.0].[type=protobuf_MessageC1].[type=protobuf_MessageC2].messageList.[type=array].[type=protobuf_MessageC3].list.[type=union].one_of_field.[type=string].one_of_string") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))) + .setNativeDataType("string") + .setDescription("one of string comment") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(test.getAuditStamp())), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageC1].[type=protobuf_MessageC2].messageList.[type=array].[type=protobuf_MessageC3].list.[type=union].one_of_field.[type=string].one_of_string")).findFirst().orElseThrow()); + + assertEquals(new SchemaField() + .setNullable(true) + .setIsPartOfKey(false) + .setFieldPath("[version=2.0].[type=protobuf_MessageC1].[type=protobuf_MessageC2].messageList.[type=array].[type=protobuf_MessageC3].list.[type=union].one_of_field.[type=int].one_of_int") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))) + .setNativeDataType("int32") + .setDescription("one of int comment") + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(test.getAuditStamp())), + testMetadata.getFields().stream().filter(f -> f.getFieldPath() + .equals("[version=2.0].[type=protobuf_MessageC1].[type=protobuf_MessageC2].messageList.[type=array].[type=protobuf_MessageC3].list.[type=union].one_of_field.[type=int].one_of_int")).findFirst().orElseThrow()); + } + + @Test + public void customFieldVisitors() throws IOException { + ProtobufDataset test = getTestProtobufDataset("protobuf", "messageA"); + + test.setFieldVisitor(new ProtobufModelVisitor>() { + @Override + public Stream> visitField(ProtobufField field, VisitContext context) { + if (field.fullName().equals("protobuf.MessageA.sequence_id")) { + return Stream.of(Pair.of( + new SchemaField() + .setDescription("my comment") + .setNativeDataType("my type") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BytesType()))), + 0d)); + } else { + return Stream.of(); + } + } + }); + assertEquals(1, test.getSchemaMetadata().getFields().size()); + assertEquals(new SchemaField() + .setDescription("my comment") + .setNativeDataType("my type") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BytesType()))), + test.getSchemaMetadata().getFields().get(0)); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java new file mode 100644 index 00000000000000..a967157b500f8a --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java @@ -0,0 +1,37 @@ +package datahub.protobuf; + +import com.google.protobuf.DescriptorProtos; +import com.google.protobuf.ExtensionRegistry; +import datahub.protobuf.model.ProtobufGraph; +import org.junit.Test; + +import java.io.IOException; + +import static datahub.protobuf.TestFixtures.getTestProtobufFileSet; +import static datahub.protobuf.TestFixtures.getTestProtoc; +import static org.junit.jupiter.api.Assertions.*; + + +public class ProtobufUtilsTest { + + @Test + public void registryTest() throws IOException, IllegalArgumentException { + byte[] protocBytes = getTestProtoc("extended_protobuf", "messageA").readAllBytes(); + DescriptorProtos.FileDescriptorSet fileSet = getTestProtobufFileSet("extended_protobuf", "messageA"); + ExtensionRegistry registry = ProtobufUtils.buildRegistry(fileSet); + DescriptorProtos.FileDescriptorSet fileSetWithRegistry = DescriptorProtos.FileDescriptorSet.parseFrom(protocBytes, registry); + + assertNotEquals(fileSet, fileSetWithRegistry); + + /* + * + * Without the ExtensionRegistry we get field numbers instead of the names. + */ + ProtobufGraph graph = new ProtobufGraph(fileSet, null); + assertEquals("[meta.msg.classification_enum]: HighlyConfidential\n" + + "[meta.msg.team]: \"TeamB\"\n" + + "[meta.msg.type]: ENTITY\n" + + "[meta.msg.bool_feature]: true\n" + + "[meta.msg.alert_channel]: \"#alerts\"\n", graph.root().messageProto().getOptions().toString()); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/TestFixtures.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/TestFixtures.java new file mode 100644 index 00000000000000..b485ceb87d5861 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/TestFixtures.java @@ -0,0 +1,78 @@ +package datahub.protobuf; + +import com.google.protobuf.DescriptorProtos; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.FabricType; +import com.linkedin.common.urn.CorpuserUrn; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.data.DataMap; +import com.linkedin.data.template.RecordTemplate; +import datahub.event.MetadataChangeProposalWrapper; +import datahub.protobuf.model.ProtobufGraph; +import datahub.protobuf.visitors.VisitContext; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.Objects; + +public class TestFixtures { + private TestFixtures() { } + + public static final DataPlatformUrn TEST_DATA_PLATFORM = new DataPlatformUrn("kafka"); + public static final AuditStamp TEST_AUDIT_STAMP = new AuditStamp() + .setTime(System.currentTimeMillis()) + .setActor(new CorpuserUrn("datahub")); + + public static InputStream getTestProtoc(String protoPackage, String filename) { + return Objects.requireNonNull(TestFixtures.class.getClassLoader() + .getResourceAsStream(String.format("%s/%s.protoc", protoPackage, filename))); + } + + public static String getTestProtoSource(String protoPackage, String filename) throws IOException { + return new String(Objects.requireNonNull(TestFixtures.class.getClassLoader() + .getResourceAsStream(String.format("%s/%s.proto", protoPackage, filename))).readAllBytes(), + StandardCharsets.UTF_8); + } + + public static ProtobufDataset getTestProtobufDataset(String protoPackage, String filename) throws IOException { + return ProtobufDataset.builder() + .setDataPlatformUrn(TEST_DATA_PLATFORM) + .setSchema(getTestProtoSource(protoPackage, filename)) + .setProtocIn(getTestProtoc(protoPackage, filename)) + .setAuditStamp(TEST_AUDIT_STAMP) + .setFabricType(FabricType.TEST) + .setGithubOrganization("myOrg") + .setSlackTeamId("SLACK123") + .build(); + } + + public static DescriptorProtos.FileDescriptorSet getTestProtobufFileSet(String protoPackage, String filename) throws IOException { + return DescriptorProtos.FileDescriptorSet + .parseFrom(getTestProtoc(protoPackage, filename).readAllBytes()); + } + + public static VisitContext.VisitContextBuilder getVisitContextBuilder(String message) { + return VisitContext.builder() + .datasetUrn(new DatasetUrn(TEST_DATA_PLATFORM, message, FabricType.TEST)) + .auditStamp(TEST_AUDIT_STAMP); + } + + public static ProtobufGraph getTestProtobufGraph(String protoPackage, String filename) throws IOException { + return new ProtobufGraph(getTestProtobufFileSet(protoPackage, filename)); + } + + public static Object extractAspect(MetadataChangeProposalWrapper mcp, String aspect) { + return mcp.getAspect().data().get(aspect); + } + + public static Object extractCustomProperty(MetadataChangeProposalWrapper mcp, String key) { + return ((DataMap) extractAspect(mcp, "customProperties")).get(key); + } + + public static String extractDocumentSchema(ProtobufDataset protobufDataset) { + return String.valueOf(((DataMap) ((DataMap) protobufDataset.getSchemaMetadata().getPlatformSchema().data()) + .get("com.linkedin.schema.KafkaSchema")).get("documentSchema")); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java new file mode 100644 index 00000000000000..3696f5795e1f95 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufEnumTest.java @@ -0,0 +1,80 @@ +package datahub.protobuf.model; + +import com.google.protobuf.DescriptorProtos.DescriptorProto; +import com.google.protobuf.DescriptorProtos.EnumDescriptorProto; +import com.google.protobuf.DescriptorProtos.FileDescriptorProto; +import com.linkedin.schema.EnumType; +import com.linkedin.schema.SchemaFieldDataType; +import org.junit.Test; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.*; + + +public class ProtobufEnumTest { + + @Test + public void enumTest() { + EnumDescriptorProto expectedEnum = EnumDescriptorProto.newBuilder() + .setName("enum1") + .build(); + DescriptorProto expectedMessage = DescriptorProto.newBuilder().build(); + FileDescriptorProto expectedFile = FileDescriptorProto.newBuilder() + .addMessageType(expectedMessage) + .setPackage("protobuf") + .addEnumType(expectedEnum) + .build(); + + ProtobufEnum test = ProtobufEnum.enumBuilder() + .enumProto(expectedEnum) + .messageProto(expectedMessage) + .fileProto(expectedFile) + .build(); + + assertEquals("enum1", test.name()); + assertEquals("protobuf.enum1", test.fullName()); + assertEquals("[type=enum]", test.fieldPathType()); + assertEquals("enum", test.nativeType()); + assertEquals(expectedMessage, test.messageProto()); + assertEquals(expectedFile, test.fileProto()); + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new EnumType())), test.schemaFieldDataType()); + assertEquals("ProtobufEnum[protobuf.enum1]", test.toString()); + assertEquals("", test.comment()); + } + + @Test + public void enumEqualityTest() { + EnumDescriptorProto enum1 = EnumDescriptorProto.newBuilder().setName("enum1").build(); + EnumDescriptorProto enum2 = EnumDescriptorProto.newBuilder().setName("enum2").build(); + EnumDescriptorProto enum1Dup = EnumDescriptorProto.newBuilder().setName("enum1").build(); + + DescriptorProto expectedMessage = DescriptorProto.newBuilder().build(); + FileDescriptorProto expectedFile = FileDescriptorProto.newBuilder() + .addMessageType(expectedMessage) + .setPackage("protobuf") + .addAllEnumType(List.of(enum1, enum2, enum1Dup)) + .build(); + + ProtobufEnum test1 = ProtobufEnum.enumBuilder().enumProto(enum1) + .messageProto(expectedMessage) + .fileProto(expectedFile) + .build(); + ProtobufEnum test2 = ProtobufEnum.enumBuilder().enumProto(enum2) + .messageProto(expectedMessage) + .fileProto(expectedFile) + .build(); + ProtobufEnum test1Dup = ProtobufEnum.enumBuilder().enumProto(enum1Dup) + .messageProto(expectedMessage) + .fileProto(expectedFile) + .build(); + + assertEquals(test1, test1Dup); + assertNotEquals(test1, test2); + assertEquals(Set.of(test1, test2), Stream.of(test1, test2, test1Dup).collect(Collectors.toSet())); + } + +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java new file mode 100644 index 00000000000000..7ec0456c9c7403 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufFieldTest.java @@ -0,0 +1,230 @@ +package datahub.protobuf.model; + +import com.google.protobuf.DescriptorProtos.DescriptorProto; +import com.google.protobuf.DescriptorProtos.FieldDescriptorProto; +import com.google.protobuf.DescriptorProtos.FileDescriptorProto; +import com.linkedin.data.template.StringArray; +import com.linkedin.schema.ArrayType; +import com.linkedin.schema.BooleanType; +import com.linkedin.schema.BytesType; +import com.linkedin.schema.EnumType; +import com.linkedin.schema.FixedType; +import com.linkedin.schema.NumberType; +import com.linkedin.schema.RecordType; +import com.linkedin.schema.SchemaFieldDataType; +import com.linkedin.schema.StringType; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.*; + + +public class ProtobufFieldTest { + private static final DescriptorProto EXPECTED_MESSAGE_PROTO = DescriptorProto.newBuilder() + .setName("message1") + .build(); + private static final FileDescriptorProto EXPECTED_FILE_PROTO = FileDescriptorProto.newBuilder() + .addMessageType(EXPECTED_MESSAGE_PROTO) + .setPackage("protobuf") + .build(); + private static final ProtobufMessage EXPECTED_MESSAGE = ProtobufMessage.builder() + .messageProto(EXPECTED_MESSAGE_PROTO) + .fileProto(EXPECTED_FILE_PROTO) + .build(); + + + @Test + public void fieldTest() { + FieldDescriptorProto expectedField = FieldDescriptorProto.newBuilder() + .setName("field1") + .setNumber(1) + .setType(FieldDescriptorProto.Type.TYPE_BYTES) + .build(); + DescriptorProto expectedMessage1 = DescriptorProto.newBuilder() + .setName("message1") + .addField(expectedField) + .build(); + FileDescriptorProto expectedFile = FileDescriptorProto.newBuilder() + .addMessageType(expectedMessage1) + .setPackage("protobuf") + .build(); + ProtobufMessage expectedMessage = ProtobufMessage.builder() + .messageProto(expectedMessage1) + .fileProto(expectedFile) + .build(); + + ProtobufField test = ProtobufField.builder() + .fieldProto(expectedField) + .protobufMessage(expectedMessage) + .build(); + + assertEquals("field1", test.name()); + assertEquals("protobuf.message1.field1", test.fullName()); + assertEquals("[type=bytes]", test.fieldPathType()); + assertEquals("protobuf.message1", test.parentMessageName()); + assertEquals(expectedMessage1, test.messageProto()); + assertEquals(expectedFile, test.fileProto()); + assertNull(test.oneOfProto()); + assertEquals("bytes", test.nativeType()); + assertFalse(test.isMessage()); + assertEquals(1, test.sortWeight()); + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BytesType())), test.schemaFieldDataType()); + assertEquals("ProtobufField[protobuf.message1.field1]", test.toString()); + } + + @Test + public void fieldPathTypeTest() { + Arrays.stream(FieldDescriptorProto.Type.values()).forEach(type -> { + final FieldDescriptorProto expectedField; + if (type == FieldDescriptorProto.Type.TYPE_MESSAGE) { + expectedField = FieldDescriptorProto.newBuilder() + .setName("field1") + .setNumber(1) + .setTypeName(EXPECTED_MESSAGE.fullName()) + .setType(type) + .build(); + } else { + expectedField = FieldDescriptorProto.newBuilder() + .setName("field1") + .setNumber(1) + .setType(type) + .build(); + } + + ProtobufField test = ProtobufField.builder() + .fieldProto(expectedField) + .protobufMessage(EXPECTED_MESSAGE) + .build(); + + if (type.equals(FieldDescriptorProto.Type.TYPE_MESSAGE)) { + assertEquals("[type=protobuf_message1]", test.fieldPathType()); + } else if (type.name().endsWith("64")) { + assertEquals("[type=long]", test.fieldPathType()); + } else if (type.name().endsWith("32")) { + assertEquals("[type=int]", test.fieldPathType()); + } else if (type.name().endsWith("BOOL")) { + assertEquals("[type=boolean]", test.fieldPathType()); + } else { + assertEquals(String.format("[type=%s]", type.name().split("_")[1].toLowerCase()), test.fieldPathType()); + } + }); + } + + @Test + public void fieldPathTypeArrayTest() { + Arrays.stream(FieldDescriptorProto.Type.values()).forEach(type -> { + final FieldDescriptorProto expectedField; + + if (type == FieldDescriptorProto.Type.TYPE_MESSAGE) { + expectedField = FieldDescriptorProto.newBuilder() + .setName("field1") + .setNumber(1) + .setTypeName(EXPECTED_MESSAGE.fullName()) + .setType(type) + .setLabel(FieldDescriptorProto.Label.LABEL_REPEATED) + .build(); + } else { + expectedField = FieldDescriptorProto.newBuilder() + .setName("field1") + .setNumber(1) + .setType(type) + .setLabel(FieldDescriptorProto.Label.LABEL_REPEATED) + .build(); + } + + ProtobufField test = ProtobufField.builder() + .fieldProto(expectedField) + .protobufMessage(EXPECTED_MESSAGE) + .build(); + + if (type.equals(FieldDescriptorProto.Type.TYPE_MESSAGE)) { + assertEquals("[type=array].[type=protobuf_message1]", test.fieldPathType()); + } else if (type.name().endsWith("64")) { + assertEquals("[type=array].[type=long]", test.fieldPathType()); + } else if (type.name().endsWith("32")) { + assertEquals("[type=array].[type=int]", test.fieldPathType()); + } else if (type.name().endsWith("BOOL")) { + assertEquals("[type=array].[type=boolean]", test.fieldPathType()); + } else { + assertEquals(String.format("[type=array].[type=%s]", type.name().split("_")[1].toLowerCase()), test.fieldPathType()); + } + }); + } + + @Test + public void schemaFieldTypeTest() { + Arrays.stream(FieldDescriptorProto.Type.values()).forEach(type -> { + final FieldDescriptorProto expectedField; + if (type == FieldDescriptorProto.Type.TYPE_MESSAGE) { + expectedField = FieldDescriptorProto.newBuilder() + .setName("field1") + .setNumber(1) + .setTypeName(EXPECTED_MESSAGE.fullName()) + .setType(type) + .build(); + } else { + expectedField = FieldDescriptorProto.newBuilder() + .setName("field1") + .setNumber(1) + .setType(type) + .build(); + } + + ProtobufField test = ProtobufField.builder() + .fieldProto(expectedField) + .protobufMessage(EXPECTED_MESSAGE) + .build(); + + if (Set.of("TYPE_MESSAGE", "TYPE_GROUP").contains(type.name())) { + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType())), test.schemaFieldDataType()); + } else if (type.name().contains("FIXED")) { + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new FixedType())), test.schemaFieldDataType()); + } else if (type.name().endsWith("64") || type.name().endsWith("32") || Set.of("TYPE_DOUBLE", "TYPE_FLOAT").contains(type.name())) { + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())), test.schemaFieldDataType()); + } else if (type.name().endsWith("BOOL")) { + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType())), test.schemaFieldDataType()); + } else if (type.name().endsWith("STRING")) { + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())), test.schemaFieldDataType()); + } else if (type.name().endsWith("ENUM")) { + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new EnumType())), test.schemaFieldDataType()); + } else if (type.name().endsWith("BYTES")) { + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BytesType())), test.schemaFieldDataType()); + } else { + fail(String.format("Add test case for %s", type)); + } + }); + } + + @Test + public void schemaFieldTypeArrayTest() { + Arrays.stream(FieldDescriptorProto.Type.values()).forEach(type -> { + final FieldDescriptorProto expectedField; + if (type == FieldDescriptorProto.Type.TYPE_MESSAGE) { + expectedField = FieldDescriptorProto.newBuilder() + .setName("field1") + .setNumber(1) + .setTypeName(EXPECTED_MESSAGE.fullName()) + .setType(type) + .setLabel(FieldDescriptorProto.Label.LABEL_REPEATED) + .build(); + } else { + expectedField = FieldDescriptorProto.newBuilder() + .setName("field1") + .setNumber(1) + .setType(type) + .setLabel(FieldDescriptorProto.Label.LABEL_REPEATED) + .build(); + } + + ProtobufField test = ProtobufField.builder() + .fieldProto(expectedField) + .protobufMessage(EXPECTED_MESSAGE) + .build(); + + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new ArrayType() + .setNestedType(new StringArray()))), test.schemaFieldDataType()); + }); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java new file mode 100644 index 00000000000000..cfefaf1e046353 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufGraphTest.java @@ -0,0 +1,71 @@ +package datahub.protobuf.model; + +import com.google.protobuf.DescriptorProtos.FileDescriptorSet; +import org.junit.Test; + +import java.io.IOException; +import java.util.HashSet; + +import static datahub.protobuf.TestFixtures.getTestProtobufFileSet; +import static datahub.protobuf.TestFixtures.getTestProtobufGraph; +import static org.junit.jupiter.api.Assertions.*; + +public class ProtobufGraphTest { + + @Test + public void autodetectRootMessageTest() throws IOException { + FileDescriptorSet fileset = getTestProtobufFileSet("protobuf", "messageB"); + ProtobufGraph test = getTestProtobufGraph("protobuf", "messageB"); + + assertEquals("MessageB", test.autodetectRootMessage( + fileset.getFile(2)).messageProto().getName()); + + assertEquals("MessageA", test.autodetectRootMessage( + fileset.getFile(1)).messageProto().getName()); + } + + @Test + public void autodetectRootMessageFailureTest() throws IOException { + FileDescriptorSet empty = getTestProtobufFileSet("protobuf", "messageEmpty"); + assertThrows(IllegalArgumentException.class, () -> new ProtobufGraph(empty)); + } + + @Test + public void findMessageTest() throws IOException { + FileDescriptorSet fileset = getTestProtobufFileSet("protobuf", "messageB"); + ProtobufGraph test = getTestProtobufGraph("protobuf", "messageB"); + + assertEquals("MessageA", + test.findMessage("protobuf.MessageA").messageProto().getName()); + assertEquals("MessageB", + test.findMessage("protobuf.MessageB").messageProto().getName()); + + assertThrows(IllegalArgumentException.class, () -> test.findMessage("not found")); + assertThrows(IllegalArgumentException.class, () -> new ProtobufGraph(fileset, "not found")); + assertEquals(test, new ProtobufGraph(fileset, "protobuf.MessageB")); + } + + @Test + public void commentTest() throws IOException { + ProtobufGraph test = getTestProtobufGraph("protobuf", "messageC"); + assertEquals("Test for one of", test.getComment()); + } + + @Test + public void equalityHashCodeTest() throws IOException { + ProtobufGraph testA = getTestProtobufGraph("protobuf", "messageA"); + ProtobufGraph testB = getTestProtobufGraph("protobuf", "messageB"); + FileDescriptorSet filesetB = getTestProtobufFileSet("protobuf", "messageB"); + + assertEquals(testB, new ProtobufGraph(filesetB)); + assertNotEquals(testA, new ProtobufGraph(filesetB)); + assertEquals(testA, testA); + assertNotEquals(testA, testB); + + HashSet graphs = new HashSet<>(); + graphs.add(testA); + graphs.add(testB); + graphs.add(new ProtobufGraph(filesetB)); + assertEquals(2, graphs.size()); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java new file mode 100644 index 00000000000000..035c16552aeb5a --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufMessageTest.java @@ -0,0 +1,180 @@ +package datahub.protobuf.model; + +import com.google.protobuf.DescriptorProtos.DescriptorProto; +import com.google.protobuf.DescriptorProtos.FileDescriptorProto; +import com.linkedin.schema.MapType; +import com.linkedin.schema.RecordType; +import com.linkedin.schema.SchemaFieldDataType; +import org.junit.Test; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.*; + + +public class ProtobufMessageTest { + + @Test + public void messageTest() { + DescriptorProto expectedMessage = DescriptorProto.newBuilder() + .setName("message1") + .build(); + DescriptorProto expectedParentMessage1 = DescriptorProto.newBuilder() + .setName("messageParent1") + .addNestedType(expectedMessage) + .build(); + + FileDescriptorProto expectedFile = FileDescriptorProto.newBuilder() + .addMessageType(expectedMessage) + .setPackage("protobuf") + .build(); + + ProtobufMessage testParent = ProtobufMessage.builder() + .messageProto(expectedParentMessage1) + .fileProto(expectedFile) + .build(); + ProtobufMessage test = ProtobufMessage.builder() + .messageProto(expectedMessage) + .parentMessageProto(expectedParentMessage1) + .fileProto(expectedFile) + .build(); + + assertEquals("messageParent1", testParent.name()); + assertEquals("protobuf.messageParent1", testParent.fullName()); + assertEquals("protobuf.messageParent1", testParent.nativeType()); + assertEquals("[type=protobuf_messageParent1]", testParent.fieldPathType()); + assertEquals(expectedFile, testParent.fileProto()); + assertEquals(expectedParentMessage1, testParent.messageProto()); + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType())), testParent.schemaFieldDataType()); + assertEquals("ProtobufMessage[protobuf.messageParent1]", testParent.toString()); + + assertEquals("message1", test.name()); + assertEquals("protobuf.messageParent1.message1", test.fullName()); + assertEquals("protobuf.messageParent1.message1", test.nativeType()); + assertEquals("[type=protobuf_messageParent1_message1]", test.fieldPathType()); + assertEquals(expectedFile, test.fileProto()); + assertEquals(expectedMessage, test.messageProto()); + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType())), test.schemaFieldDataType()); + assertEquals("ProtobufMessage[protobuf.messageParent1.message1]", test.toString()); + } + + @Test + public void mapTest() { + DescriptorProto expectedMap = DescriptorProto.newBuilder() + .setName("MapFieldEntry") + .build(); + DescriptorProto expectedParentMessage1 = DescriptorProto.newBuilder() + .setName("messageParent1") + .addNestedType(expectedMap) + .build(); + + FileDescriptorProto expectedFile = FileDescriptorProto.newBuilder() + .addMessageType(expectedMap) + .setPackage("protobuf") + .build(); + + ProtobufMessage testParent = ProtobufMessage.builder() + .messageProto(expectedParentMessage1) + .fileProto(expectedFile) + .build(); + ProtobufMessage testMap = ProtobufMessage.builder() + .messageProto(expectedMap) + .parentMessageProto(expectedParentMessage1) + .fileProto(expectedFile) + .build(); + + assertEquals("messageParent1", testParent.name()); + assertEquals("protobuf.messageParent1", testParent.fullName()); + assertEquals("protobuf.messageParent1", testParent.nativeType()); + assertEquals("[type=protobuf_messageParent1]", testParent.fieldPathType()); + assertEquals(expectedFile, testParent.fileProto()); + assertEquals(expectedParentMessage1, testParent.messageProto()); + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType())), testParent.schemaFieldDataType()); + assertEquals("ProtobufMessage[protobuf.messageParent1]", testParent.toString()); + + assertEquals("MapFieldEntry", testMap.name()); + assertEquals("protobuf.messageParent1.MapFieldEntry", testMap.fullName()); + assertEquals("protobuf.messageParent1.MapFieldEntry", testMap.nativeType()); + assertEquals("[type=protobuf_messageParent1_MapFieldEntry]", testMap.fieldPathType()); + assertEquals(expectedFile, testMap.fileProto()); + assertEquals(expectedMap, testMap.messageProto()); + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new MapType())), testMap.schemaFieldDataType()); + assertEquals("ProtobufMessage[protobuf.messageParent1.MapFieldEntry]", testMap.toString()); + } + + @Test + public void messageEqualityTest() { + DescriptorProto expectedMessage1 = DescriptorProto.newBuilder() + .setName("message1") + .build(); + DescriptorProto expectedMessage2 = DescriptorProto.newBuilder() + .setName("message2") + .build(); + DescriptorProto expectedMessage1Dup = DescriptorProto.newBuilder() + .setName("message1") + .build(); + + FileDescriptorProto expectedFile = FileDescriptorProto.newBuilder() + .addAllMessageType(List.of(expectedMessage1, expectedMessage2, expectedMessage1Dup)) + .setPackage("protobuf") + .build(); + + + ProtobufMessage test1 = ProtobufMessage.builder() + .messageProto(expectedMessage1) + .fileProto(expectedFile) + .build(); + ProtobufMessage test2 = ProtobufMessage.builder() + .messageProto(expectedMessage2) + .fileProto(expectedFile) + .build(); + ProtobufMessage test1Dup = ProtobufMessage.builder() + .messageProto(expectedMessage1Dup) + .fileProto(expectedFile) + .build(); + + assertEquals(test1, test1Dup); + assertNotEquals(test1, test2); + assertEquals(Set.of(test1, test2), Stream.of(test1, test2, test1Dup).collect(Collectors.toSet())); + } + + @Test + public void majorVersionTest() { + DescriptorProto expectedMessage1 = DescriptorProto.newBuilder() + .setName("message1") + .build(); + + FileDescriptorProto expectedFile1 = FileDescriptorProto.newBuilder() + .setName("zendesk/v1/platform/test.proto") + .setPackage("protobuf") + .build(); + ProtobufMessage test1 = ProtobufMessage.builder() + .messageProto(expectedMessage1) + .fileProto(expectedFile1) + .build(); + assertEquals(1, test1.majorVersion()); + + FileDescriptorProto expectedFile2 = FileDescriptorProto.newBuilder() + .setName("zendesk/v2/platform/test.proto") + .setPackage("protobuf") + .build(); + ProtobufMessage test2 = ProtobufMessage.builder() + .messageProto(expectedMessage1) + .fileProto(expectedFile2) + .build(); + assertEquals(2, test2.majorVersion()); + + FileDescriptorProto expectedFile3 = FileDescriptorProto.newBuilder() + .setName("zendesk/platform/test.proto") + .setPackage("protobuf") + .build(); + ProtobufMessage test3 = ProtobufMessage.builder() + .messageProto(expectedMessage1) + .fileProto(expectedFile3) + .build(); + assertEquals(1, test3.majorVersion()); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java new file mode 100644 index 00000000000000..f9b168437643bc --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/model/ProtobufOneOfFieldTest.java @@ -0,0 +1,121 @@ +package datahub.protobuf.model; + +import com.google.protobuf.DescriptorProtos.DescriptorProto; +import com.google.protobuf.DescriptorProtos.FieldDescriptorProto; +import com.google.protobuf.DescriptorProtos.FileDescriptorProto; +import com.google.protobuf.DescriptorProtos.OneofDescriptorProto; +import com.linkedin.schema.SchemaFieldDataType; +import com.linkedin.schema.UnionType; +import org.junit.Test; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.*; + + +public class ProtobufOneOfFieldTest { + + @Test + public void oneOfTest() { + OneofDescriptorProto expectedOneOf = OneofDescriptorProto.newBuilder() + .setName("oneof1") + .build(); + FieldDescriptorProto expectedField = FieldDescriptorProto.newBuilder() + .setName("field1") + .setOneofIndex(0) + .build(); + DescriptorProto expectedMessage = DescriptorProto.newBuilder() + .setName("message1") + .addOneofDecl(expectedOneOf) + .addField(expectedField) + .build(); + FileDescriptorProto expectedFile = FileDescriptorProto.newBuilder() + .addMessageType(expectedMessage) + .setPackage("protobuf") + .build(); + + ProtobufOneOfField test = ProtobufOneOfField.oneOfBuilder() + .fieldProto(expectedField) + .protobufMessage(ProtobufMessage.builder().fileProto(expectedFile).messageProto(expectedMessage).build()) + .build(); + + assertEquals("oneof1", test.name()); + assertEquals("protobuf.message1.oneof1", test.fullName()); + assertEquals("[type=union]", test.fieldPathType()); + assertEquals("oneof", test.nativeType()); + assertEquals(expectedOneOf, test.oneOfProto()); + assertEquals(expectedMessage, test.messageProto()); + assertEquals(expectedFile, test.fileProto()); + assertFalse(test.isMessage()); + assertEquals(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new UnionType())), test.schemaFieldDataType()); + assertEquals("ProtobufOneOf[protobuf.message1.oneof1]", test.toString()); + } + + @Test + public void oneOfEqualityTest() { + OneofDescriptorProto oneof1Message1 = OneofDescriptorProto.newBuilder().setName("oneof1").build(); + OneofDescriptorProto oneof2Message1 = OneofDescriptorProto.newBuilder().setName("oneof2").build(); + OneofDescriptorProto oneof1Message2 = OneofDescriptorProto.newBuilder().setName("oneof1").build(); + OneofDescriptorProto oneof1Message1Dup = OneofDescriptorProto.newBuilder().setName("oneof1").build(); + + FieldDescriptorProto expectedField1 = FieldDescriptorProto.newBuilder() + .setName("field1") + .setOneofIndex(0) + .build(); + FieldDescriptorProto expectedField2 = FieldDescriptorProto.newBuilder() + .setName("field2") + .setOneofIndex(1) + .build(); + FieldDescriptorProto expectedField1Dup = FieldDescriptorProto.newBuilder() + .setName("field3") + .setOneofIndex(3) + .build(); + DescriptorProto expectedMessage1 = DescriptorProto.newBuilder() + .setName("message1") + .addAllOneofDecl(List.of(oneof1Message1, oneof2Message1, oneof1Message1Dup)) + .addField(expectedField1) + .addField(expectedField2) + .addField(expectedField1Dup) + .build(); + + FieldDescriptorProto expectedField3 = FieldDescriptorProto.newBuilder() + .setName("field3") + .setOneofIndex(0) + .build(); + DescriptorProto expectedMessage2 = DescriptorProto.newBuilder() + .setName("message2") + .addAllOneofDecl(List.of(oneof1Message2)) + .addField(expectedField3) + .build(); + + FileDescriptorProto expectedFile = FileDescriptorProto.newBuilder() + .addAllMessageType(List.of(expectedMessage1, expectedMessage2)) + .setPackage("protobuf") + .build(); + + ProtobufOneOfField test1 = ProtobufOneOfField.oneOfBuilder() + .fieldProto(expectedField1) + .protobufMessage(ProtobufMessage.builder().fileProto(expectedFile).messageProto(expectedMessage1).build()) + .build(); + ProtobufOneOfField test1Dup = ProtobufOneOfField.oneOfBuilder() + .fieldProto(expectedField1) + .protobufMessage(ProtobufMessage.builder().fileProto(expectedFile).messageProto(expectedMessage1).build()) + .build(); + ProtobufOneOfField test2 = ProtobufOneOfField.oneOfBuilder() + .fieldProto(expectedField2) + .protobufMessage(ProtobufMessage.builder().fileProto(expectedFile).messageProto(expectedMessage1).build()) + .build(); + ProtobufOneOfField test3 = ProtobufOneOfField.oneOfBuilder() + .fieldProto(expectedField3) + .protobufMessage(ProtobufMessage.builder().fileProto(expectedFile).messageProto(expectedMessage2).build()) + .build(); + + assertEquals(test1, test1Dup); + assertNotEquals(test1, test3); + assertNotEquals(test1, test2); + assertEquals(Set.of(test1, test2, test3), Stream.of(test1, test2, test3, test1Dup).collect(Collectors.toSet())); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java new file mode 100644 index 00000000000000..165823d8e4925f --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DatasetVisitorTest.java @@ -0,0 +1,56 @@ +package datahub.protobuf.visitors.dataset; + +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.data.template.RecordTemplate; +import org.junit.Test; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import datahub.protobuf.ProtobufDataset; +import datahub.protobuf.visitors.ProtobufModelVisitor; +import datahub.protobuf.visitors.VisitContext; +import datahub.event.MetadataChangeProposalWrapper; + +import static datahub.protobuf.TestFixtures.*; +import static org.junit.jupiter.api.Assertions.assertEquals; + + +public class DatasetVisitorTest { + + @Test + public void protocBase64Test() throws URISyntaxException, IOException { + String expected = "23454345452345233455"; + DatasetVisitor test = DatasetVisitor.builder().protocBase64(expected).build(); + + List> changes = + test.visitGraph( + VisitContext.builder() + .auditStamp(TEST_AUDIT_STAMP) + .datasetUrn(DatasetUrn.createFromString("urn:li:dataset:(urn:li:dataPlatform:kafka,protobuf.MessageA,TEST)")) + .graph(getTestProtobufGraph("protobuf", "messageA")).build() + ).collect(Collectors.toList()); + + assertEquals(expected, extractCustomProperty(changes.get(0), "protoc")); + } + + @Test + public void customDescriptionVisitors() throws IOException { + ProtobufDataset testDataset = getTestProtobufDataset("protobuf", "messageA"); + + DatasetVisitor test = DatasetVisitor.builder() + .descriptionVisitor(new ProtobufModelVisitor() { + @Override + public Stream visitGraph(VisitContext context) { + return Stream.of("Test Description"); + } + }) + .build(); + testDataset.setDatasetVisitor(test); + + assertEquals("Test Description", extractAspect(testDataset.getDatasetMCPs().get(0), "description")); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java new file mode 100644 index 00000000000000..4a02c1089fe2d7 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/DescriptionVisitorTest.java @@ -0,0 +1,27 @@ +package datahub.protobuf.visitors.dataset; + +import datahub.protobuf.model.ProtobufGraph; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import static datahub.protobuf.TestFixtures.*; +import static org.junit.jupiter.api.Assertions.assertEquals; + + +public class DescriptionVisitorTest { + + @Test + public void visitorTest() throws IOException { + ProtobufGraph graph = getTestProtobufGraph("protobuf", "messageB"); + + DescriptionVisitor test = new DescriptionVisitor(); + + assertEquals(Set.of("This contains nested types.\n\nOwned by TeamB"), + getTestProtobufGraph("protobuf", "messageB") + .accept(getVisitContextBuilder("protobuf.MessageB"), List.of(test)).collect(Collectors.toSet())); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java new file mode 100644 index 00000000000000..a313681c5a5a05 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/InstitutionalMemoryVisitorTest.java @@ -0,0 +1,68 @@ +package datahub.protobuf.visitors.dataset; + +import com.linkedin.common.InstitutionalMemoryMetadata; +import com.linkedin.common.url.Url; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import static datahub.protobuf.TestFixtures.*; +import static org.junit.jupiter.api.Assertions.assertEquals; + + +public class InstitutionalMemoryVisitorTest { + + @Test + public void messageATest() throws IOException { + InstitutionalMemoryVisitor test = new InstitutionalMemoryVisitor("SLACK123", "myOrg"); + assertEquals(Set.of(new InstitutionalMemoryMetadata() + .setCreateStamp(TEST_AUDIT_STAMP) + .setDescription("Slack Channel") + .setUrl(new Url("https://slack.com/app_redirect?channel=test-slack&team=SLACK123")), + new InstitutionalMemoryMetadata() + .setCreateStamp(TEST_AUDIT_STAMP) + .setDescription("Github Team") + .setUrl(new Url("https://github.com/orgs/myOrg/teams/teama")), + new InstitutionalMemoryMetadata() + .setCreateStamp(TEST_AUDIT_STAMP) + .setDescription("MessageA Reference 1") + .setUrl(new Url("https://some/link")), + new InstitutionalMemoryMetadata() + .setCreateStamp(TEST_AUDIT_STAMP) + .setDescription("MessageA Reference 2") + .setUrl(new Url("https://www.google.com/search?q=protobuf+messages")), + new InstitutionalMemoryMetadata() + .setCreateStamp(TEST_AUDIT_STAMP) + .setDescription("MessageA Reference 3") + .setUrl(new Url("https://github.com/apache/kafka")), + new InstitutionalMemoryMetadata() + .setCreateStamp(TEST_AUDIT_STAMP) + .setDescription("MessageA.map_field Reference 1") + .setUrl(new Url("https://developers.google.com/protocol-buffers/docs/proto3#maps")) + ), + + getTestProtobufGraph("protobuf", "messageA") + .accept(getVisitContextBuilder("protobuf.MessageA"), + List.of(test)).collect(Collectors.toSet())); + } + + @Test + public void messageBTest() throws IOException { + InstitutionalMemoryVisitor test = new InstitutionalMemoryVisitor("SLACK123", "myOrg"); + assertEquals(Set.of(), + getTestProtobufGraph("protobuf", "messageB") + .accept(getVisitContextBuilder("protobuf.MessageB"), + List.of(test)).collect(Collectors.toSet())); + } + + @Test + public void messageCTest() throws IOException { + InstitutionalMemoryVisitor test = new InstitutionalMemoryVisitor("SLACK123", "myOrg"); + assertEquals(Set.of(), getTestProtobufGraph("protobuf", "messageC") + .accept(getVisitContextBuilder("protobuf.MessageC"), + List.of(test)).collect(Collectors.toSet())); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java new file mode 100644 index 00000000000000..84e7eb19f893b5 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/KafkaTopicPropertyVisitorTest.java @@ -0,0 +1,36 @@ +package datahub.protobuf.visitors.dataset; + +import com.linkedin.data.template.StringMap; +import com.linkedin.dataset.DatasetProperties; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static datahub.protobuf.TestFixtures.getTestProtobufGraph; +import static datahub.protobuf.TestFixtures.getVisitContextBuilder; +import static org.junit.jupiter.api.Assertions.assertEquals; + + +public class KafkaTopicPropertyVisitorTest { + + @Test + public void visitorTest() throws IOException { + KafkaTopicPropertyVisitor test = new KafkaTopicPropertyVisitor(); + assertEquals(List.of(new DatasetProperties() + .setCustomProperties(new StringMap(Map.of("kafka_topic", "platform.topic")))), + getTestProtobufGraph("protobuf", "messageA") + .accept(getVisitContextBuilder("MessageB"), + List.of(test)).collect(Collectors.toList())); + } + + @Test + public void visitorEmptyTest() throws IOException { + KafkaTopicPropertyVisitor test = new KafkaTopicPropertyVisitor(); + assertEquals(Set.of(), getTestProtobufGraph("protobuf", "messageB") + .accept(getVisitContextBuilder("MessageB"), List.of(test)).collect(Collectors.toSet())); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/ProtobufExtensionPropertyVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/ProtobufExtensionPropertyVisitorTest.java new file mode 100644 index 00000000000000..d00c81be47956e --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/ProtobufExtensionPropertyVisitorTest.java @@ -0,0 +1,46 @@ +package datahub.protobuf.visitors.dataset; + +import com.linkedin.data.template.StringMap; +import com.linkedin.dataset.DatasetProperties; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static datahub.protobuf.TestFixtures.getTestProtobufGraph; +import static datahub.protobuf.TestFixtures.getVisitContextBuilder; +import static org.junit.jupiter.api.Assertions.assertEquals; + + +public class ProtobufExtensionPropertyVisitorTest { + + @Test + public void extendedMessageTest() throws IOException { + ProtobufExtensionPropertyVisitor test = new ProtobufExtensionPropertyVisitor(); + + List actual = getTestProtobufGraph("extended_protobuf", "messageA") + .accept(getVisitContextBuilder("extended_protobuf.Person"), + List.of(test)).collect(Collectors.toList()); + + assertEquals(List.of( + new DatasetProperties().setCustomProperties(new StringMap(Map.of("classification_enum", "HighlyConfidential", + "bool_feature", "true", + "alert_channel", "#alerts", + "team", "TeamB", + "type", "ENTITY")))), + actual); + } + + @Test + public void extendedFieldTest() throws IOException { + ProtobufExtensionPropertyVisitor test = new ProtobufExtensionPropertyVisitor(); + List actual = getTestProtobufGraph("extended_protobuf", "messageB") + .accept(getVisitContextBuilder("extended_protobuf.Person"), + List.of(test)).collect(Collectors.toList()); + + assertEquals(List.of(new DatasetProperties() + .setCustomProperties(new StringMap(Map.of()))), actual); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/ProtobufExtensionTermAssocVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/ProtobufExtensionTermAssocVisitorTest.java new file mode 100644 index 00000000000000..ac8600a20c5e91 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/dataset/ProtobufExtensionTermAssocVisitorTest.java @@ -0,0 +1,38 @@ +package datahub.protobuf.visitors.dataset; + +import com.linkedin.common.GlossaryTermAssociation; +import com.linkedin.common.urn.GlossaryTermUrn; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import static datahub.protobuf.TestFixtures.getTestProtobufGraph; +import static datahub.protobuf.TestFixtures.getVisitContextBuilder; +import static org.junit.jupiter.api.Assertions.assertEquals; + + +public class ProtobufExtensionTermAssocVisitorTest { + + @Test + public void extendedMessageTest() throws IOException { + ProtobufExtensionTermAssocVisitor test = new ProtobufExtensionTermAssocVisitor(); + assertEquals(Set.of( + new GlossaryTermAssociation().setUrn(new GlossaryTermUrn("Classification.HighlyConfidential")) + ), + getTestProtobufGraph("extended_protobuf", "messageA") + .accept(getVisitContextBuilder("extended_protobuf.Person"), + List.of(test)).collect(Collectors.toSet())); + } + + @Test + public void extendedFieldTest() throws IOException { + ProtobufExtensionTermAssocVisitor test = new ProtobufExtensionTermAssocVisitor(); + assertEquals( + Set.of(), + getTestProtobufGraph("extended_protobuf", "messageB"). + accept(getVisitContextBuilder("extended_protobuf.Person"), List.of(test)).collect(Collectors.toSet())); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java new file mode 100644 index 00000000000000..0945fd11d67941 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/ProtobufExtensionFieldVisitorTest.java @@ -0,0 +1,218 @@ +package datahub.protobuf.visitors.field; + +import com.linkedin.common.GlobalTags; +import com.linkedin.common.GlossaryTermAssociation; +import com.linkedin.common.GlossaryTermAssociationArray; +import com.linkedin.common.GlossaryTerms; +import com.linkedin.common.TagAssociation; +import com.linkedin.common.TagAssociationArray; +import com.linkedin.common.urn.GlossaryTermUrn; +import com.linkedin.common.urn.TagUrn; +import com.linkedin.schema.NumberType; +import com.linkedin.schema.RecordType; +import com.linkedin.schema.SchemaField; +import com.linkedin.schema.SchemaFieldDataType; +import com.linkedin.schema.StringType; +import com.linkedin.util.Pair; +import datahub.protobuf.ProtobufDataset; +import org.junit.Test; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static datahub.protobuf.TestFixtures.*; +import static org.junit.jupiter.api.Assertions.assertEquals; + + +public class ProtobufExtensionFieldVisitorTest { + + @Test + public void extendedMessageTest() throws IOException, URISyntaxException { + ProtobufExtensionFieldVisitor test = new ProtobufExtensionFieldVisitor(); + List actual = getTestProtobufGraph("extended_protobuf", "messageA") + .accept(getVisitContextBuilder("extended_protobuf.Person"), List.of(test)) + .sorted(ProtobufDataset.COMPARE_BY_ROOT_MESSAGE_FIELD_WEIGHT.thenComparing(ProtobufDataset.COMPARE_BY_FIELD_PATH)) + .map(Pair::getFirst) + .collect(Collectors.toList()); + + List expected = Stream.of( + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=extended_protobuf_Person].[type=string].name") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("") + .setNativeDataType("string") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + 1), + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=extended_protobuf_Person].[type=int].id") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("") + .setNativeDataType("int32") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + 2), + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=extended_protobuf_Person].[type=string].email") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("") + .setNativeDataType("string") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + 3), + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=extended_protobuf_Person].[type=extended_protobuf_Department].dept") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("") + .setNativeDataType("extended_protobuf.Department") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray( + new TagAssociation().setTag(new TagUrn("MetaEnumExample.ENTITY")), + new TagAssociation().setTag(new TagUrn("team.TeamA")) + ))) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray( + new GlossaryTermAssociation().setUrn(new GlossaryTermUrn("Classification.Sensitive")) + )).setAuditStamp(TEST_AUDIT_STAMP)), + 4), + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=extended_protobuf_Person].[type=extended_protobuf_Department].dept.[type=int].id") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("") + .setNativeDataType("int32") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + 4), + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=extended_protobuf_Person].[type=extended_protobuf_Department].dept.[type=string].name") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("") + .setNativeDataType("string") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + 4) + ).map(Pair::getFirst).collect(Collectors.toList()); + + + assertEquals(expected, actual); + } + + @Test + public void extendedFieldTest() throws IOException { + ProtobufExtensionFieldVisitor test = new ProtobufExtensionFieldVisitor(); + List actual = getTestProtobufGraph("extended_protobuf", "messageB") + .accept(getVisitContextBuilder("extended_protobuf.Person"), List.of(test)) + .sorted(ProtobufDataset.COMPARE_BY_ROOT_MESSAGE_FIELD_WEIGHT.thenComparing(ProtobufDataset.COMPARE_BY_FIELD_PATH)) + .map(Pair::getFirst) + .collect(Collectors.toList()); + + List expected = Stream.of( + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=extended_protobuf_Person].[type=string].name") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("person name") + .setNativeDataType("string") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray( + new GlossaryTermAssociation().setUrn(new GlossaryTermUrn("Classification.HighlyConfidential")) + )).setAuditStamp(TEST_AUDIT_STAMP)), + 1), + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=extended_protobuf_Person].[type=int].id") + .setNullable(false) + .setIsPartOfKey(true) + .setDescription("unique identifier for a given person") + .setNativeDataType("int32") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + 2), + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=extended_protobuf_Person].[type=string].email") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("official email address") + .setNativeDataType("string") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray( + new GlossaryTermAssociation().setUrn(new GlossaryTermUrn("Classification.HighlyConfidential")) + )).setAuditStamp(TEST_AUDIT_STAMP)), + 3), + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=extended_protobuf_Person].[type=extended_protobuf_Department].dept") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("department name of the person") + .setNativeDataType("extended_protobuf.Department") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + 4), + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=extended_protobuf_Person].[type=extended_protobuf_Department].dept.[type=int].id") + .setNullable(false) + .setIsPartOfKey(true) + .setDescription("") + .setNativeDataType("int32") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + 4), + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=extended_protobuf_Person].[type=extended_protobuf_Department].dept.[type=string].name") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("") + .setNativeDataType("string") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray())) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + 4), + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=extended_protobuf_Person].[type=string].test_coverage") + .setNullable(true) + .setIsPartOfKey(false) + .setDescription("") + .setNativeDataType("string") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))) + .setGlobalTags(new GlobalTags().setTags(new TagAssociationArray( + new TagAssociation().setTag(new TagUrn("MetaEnumExample.EVENT")), + new TagAssociation().setTag(new TagUrn("product_type.my type")), + new TagAssociation().setTag(new TagUrn("product_type_bool")) + ))) + .setGlossaryTerms(new GlossaryTerms().setTerms(new GlossaryTermAssociationArray()).setAuditStamp(TEST_AUDIT_STAMP)), + 5) + ).map(Pair::getFirst).collect(Collectors.toList()); + + assertEquals(expected, actual); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java new file mode 100644 index 00000000000000..6c855e70d7f371 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/field/SchemaFieldVisitorTest.java @@ -0,0 +1,68 @@ +package datahub.protobuf.visitors.field; + +import com.linkedin.schema.NumberType; +import com.linkedin.schema.SchemaField; +import com.linkedin.schema.SchemaFieldDataType; +import com.linkedin.schema.StringType; +import com.linkedin.schema.UnionType; +import com.linkedin.util.Pair; +import datahub.protobuf.ProtobufDataset; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static datahub.protobuf.TestFixtures.getTestProtobufGraph; +import static datahub.protobuf.TestFixtures.getVisitContextBuilder; +import static org.junit.jupiter.api.Assertions.assertEquals; + + +public class SchemaFieldVisitorTest { + + @Test + public void visitorTest() throws IOException { + List expected = Stream.of( + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageC].[type=union].one_of_field") + .setNullable(true) + .setDescription("one of field comment") + .setNativeDataType("oneof") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new UnionType()))), + 1), + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageC].[type=union].one_of_field.[type=string].one_of_string") + .setNullable(true) + .setDescription("one of string comment") + .setNativeDataType("string") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))), + 1), + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageC].[type=union].one_of_field.[type=int].one_of_int") + .setNullable(true) + .setDescription("one of int comment") + .setNativeDataType("int32") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))), + 2), + Pair.of( + new SchemaField() + .setFieldPath("[version=2.0].[type=protobuf_MessageC].[type=string].normal") + .setNullable(true) + .setDescription("") + .setNativeDataType("string") + .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))), + 4) + ).map(Pair::getFirst).collect(Collectors.toList()); + + SchemaFieldVisitor test = new SchemaFieldVisitor(); + assertEquals(expected, getTestProtobufGraph("protobuf", "messageC") + .accept(getVisitContextBuilder("protobuf.MessageC"), List.of(test)) + .sorted(ProtobufDataset.COMPARE_BY_ROOT_MESSAGE_FIELD_WEIGHT.thenComparing(ProtobufDataset.COMPARE_BY_FIELD_PATH)) + .map(Pair::getFirst) + .collect(Collectors.toList())); + } +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/ProtobufExtensionTagVisitorTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/ProtobufExtensionTagVisitorTest.java new file mode 100644 index 00000000000000..09f0425136194e --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/visitors/tag/ProtobufExtensionTagVisitorTest.java @@ -0,0 +1,59 @@ +package datahub.protobuf.visitors.tag; + +import com.linkedin.tag.TagProperties; +import datahub.protobuf.visitors.tags.ProtobufExtensionTagVisitor; +import datahub.event.MetadataChangeProposalWrapper; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import static datahub.protobuf.TestFixtures.getTestProtobufGraph; +import static datahub.protobuf.TestFixtures.getVisitContextBuilder; +import static org.junit.jupiter.api.Assertions.assertEquals; + + +public class ProtobufExtensionTagVisitorTest { + + @Test + public void extendedMessageTest() throws IOException { + ProtobufExtensionTagVisitor test = new ProtobufExtensionTagVisitor(); + assertEquals(Set.of( + new TagProperties() + .setName("team.TeamB") + .setDescription("meta.msg.team"), + new TagProperties() + .setName("bool_feature") + .setDescription("meta.msg.bool_feature is true."), + new TagProperties() + .setName("MetaEnumExample.ENTITY") + .setDescription("Enum MetaEnumExample.ENTITY of {UNKNOWN, ENTITY, EVENT}") + ), getTestProtobufGraph("extended_protobuf", "messageA") + .accept(getVisitContextBuilder("extended_protobuf.Person"), List.of(test)) + .map(MetadataChangeProposalWrapper::getAspect) + .collect(Collectors.toSet())); + } + + @Test + public void extendedFieldTest() throws IOException { + Set expectedTagProperties = Set.of( + new TagProperties() + .setName("product_type_bool") + .setDescription("meta.fld.product_type_bool is true."), + new TagProperties() + .setName("product_type.my type") + .setDescription("meta.fld.product_type"), + new TagProperties() + .setName("MetaEnumExample.EVENT") + .setDescription("Enum MetaEnumExample.EVENT of {UNKNOWN, ENTITY, EVENT}") + ); + + assertEquals(expectedTagProperties, + getTestProtobufGraph("extended_protobuf", "messageB") + .accept(getVisitContextBuilder("extended_protobuf.Person"), List.of(new ProtobufExtensionTagVisitor())) + .map(MetadataChangeProposalWrapper::getAspect) + .collect(Collectors.toSet())); + } +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageA.proto b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageA.proto new file mode 100644 index 00000000000000..8fe9d6d5dcb10c --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageA.proto @@ -0,0 +1,28 @@ +syntax = "proto3"; +package extended_protobuf; + +import "extended_protobuf/meta/meta.proto"; +import "extended_protobuf/meta/meta_field_options.proto"; +import "extended_protobuf/meta/meta_message_options.proto"; + +message Department { + option(meta.msg.type) = ENTITY; + option(meta.msg.classification) = "Classification.Sensitive"; + option(meta.msg.team) = "TeamA"; + + int32 id = 1; + string name = 2; +} + +message Person { + option(meta.msg.type) = ENTITY; + option(meta.msg.classification_enum) = HighlyConfidential; + option(meta.msg.team) = "TeamB"; + option(meta.msg.bool_feature) = true; + option(meta.msg.alert_channel) = "#alerts"; + + string name = 1; + int32 id = 2; + string email = 3; + Department dept = 4; +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageA.protoc b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageA.protoc new file mode 100644 index 0000000000000000000000000000000000000000..45444ef366ceca3ca073803a764cec6b839dfba6 GIT binary patch literal 56167 zcmd_TdwgD3dFQKlzZXecvMj$=u&g9xN4fYCTS=Tiz$wYdmh8ybmMh6X=m(rv(yL1% z>lJ#XIA-WHElnA2J@k?g1_~*cmH=}yp;JmZ(eW~5*H z#>I)jquqsSTU?qL9y`$;?R`PtqigE5j!z zMrdBBw#Vh+S5TtaNZfUJq1J59&&*HNR^}Uv)fdFoJLYHS7S11TEY8eN*B4jjYYPDA zjyu;pDO9(_?c?>u=KRY1z4h+s*new_E7ej<%Qc0#EiQCci!JoZuc%s#TDHW$T#Tb~ zOSQDMWqYCT-xTACx%pHVtqPUX(Xb6hMdgI*B7Rn1IfrrGF@*@EzeKZo5@_` zT(UA(Uso$>&NWsSrjyBfvRq$Ut~bHBJ{`|5CZ3K1iFH+nGRfK1=89^rHtCQm0(X~s zy(?L&P2E$Qtq&w8Ru@+0puNn?h-fN<8TYM`JnRa;tFU3O@AOyioG!VOFH<(2dMl9k$QQd^w9UVYb> z&kn@#L~XVi@M^9s(@Aq*qLJ|rwI&;lg?eqV8Bb7seMMEUs_`k;!#p(2i08q@`YV=O z3f1D)a<XTq?IjE!ARow6%Yyk#uf#GC2$#8)p*_=zJV^#%0x`RdeBFM3$R+DTIy?g zVd207h1f3$sLt#hgdOV1OhYpT{ltPyBE0ju)p<>aW;&Z!&Bjz67G0e6Q!JKUOzLip z#Z2wga-Dev5|48YSJlRT>h!Nfo6DotP|`c99fmYfwR9!RwC|Q=u-RNaD?E=b zG@8xY^7%xZ!Z)kFf2FZpjC2?tnhrNZQxVz)P z)dTAq4Xka!0$6E`S&RO!mg4Q3yscK_(jrm=h=twlWA?k+8@GCS(;M02P^`W%j;HHO z+A0>O&i59sWfF_bp1?uxgbtJm(ySM9K>U3#3W zcsTAn3zsAR&aANA^poDJx#cqwuwu{1G;^b#Z1J`CP5#3BYO8_|{4#pje z^(LFK;|TF`$AOFURO}jUWGTm#KPXGRD&Cs%!c7YaT(kjSy8+#`kk^m9@0NJWngKl9uKV8*U$QCa>Z{_O z)x~)f$7M7~ajaYYy-zv;vXSRb=W9Cz$F{El;U4a7Z~J;{_x=1u{l8g>do~k1n=W6L zKSFV`zRVs_K)Cx-bucb3)KFv)(S_Yz2d>*dyUltSIIbpRu4VPMxMV`UY!LQ7I|y2i zW7e*EY22>%jyEa*X&-CT1Mx+*FK)ACGF`9xtt~BAVJtL@8v6B_dut1;b;Pq8clw8~ z(6j0lafe$~^X!55ne;4=xrH{OC+hz0W@9n1rgg6e*8MFRb2-6%`R2o0&r{keoi^d8 zzP8Z24eZ*+y4+*eptjKei3ppE(N?#{9TRt+8a^{Ldiv1uVa%#7^m9Epa(r|U+bS)O zoSb;^&E3Vc!D;uPl&^5pf$p+QEC*oZRl`FxsjPiMomLrLR_D;@=<)8h6z2HE*vQGF z-R&vNqhq6|Pj$yBgcHN#OAju)h4bsX5p3kGXFFZ6#Uinni~r)=8E?Qz-h zvfF0&Y+yG5GR|JV2e@{5Rl|wHjjDz~tVgo?nhnC=?T;_s6oYl04+!KM89J8A{0nRX z+BDa10=LH7HUN6o!uXAaxc5TD*_70oNYr#@BJtL2%{vNSxhV;364sNk|CbB#3pQ6N zH^t$$xPtXR*U*lp{eI)#b;4e+qxYC^mEY|huAAtEPQl%P>6@eYl1+hcisBy>XmWl-L-pb zXM$1XY^@GGJLUX3bkbv07-N0;%#@s7rrcl-^j@0p-!OzO)H$pwr>RjLk2@P!CTHhg zUq6F8!}idw0~HzFNb*8>pn_Q@$sV(!P z*?WamF0CxDVns|%4cp5>dsp2XZ);56J>@tO?=Y^H`+BdYLHpQNy)b?}Wsj?SfZd#{ zEiak!(}dgWz1O&^?zwazd#n}FJU2hH5+LrfY902>L)#s9ixRdJLbcsit&8XReo@?1 zQ>(&ty2q+@@-XZ1=J*Awgm|pUhnuUt&uWY8hzxs~wF1xO)yeZIW9@f%>RDi|=S?~9 zKNxrB`Cg83<9rkn^sK|9L&ImrM_xUQE`n-yd}Lzy%-Ct1=qHA|i#f&qSgGyW?p@Lv zy4G51)gOv?KrwTxH0vv8&f#Fxg2WGOMp7!@dVDP$P_Lv8%Z)14qqaPa zl_^KK{8VRZQ}Ri&vL)A3D3>iZ#(m{%`x*3At83X;0zYk-XJ54Iv$ZAocV+p!>E~@@ zZ9EuO`Jcs1f|{l7GXE`iSNLzGyOsa8b+_~1_U^d<%f-0UOdL%d{uvoWQ?fpcqFo2B z+OSRKYY!aO=60~+)a5bPQmz)3(mkwdqvx0}RgcD%yPF1EyHVGs-a!0F9+QD%;|9F_ z*!Y=~qhluqkB|B0)tzx^q4xUoYqvpr^lYaE2y)r2-F)oP^EWB|*T-d}mx4#$z-Up( z>G0^-5KDwL;hvs3H8OmdRnq^WxZ;SQ<&%smX7c-+ ztV_8~!W=(u5+K)g|9>dN9XU-~hNy*e&eRs>k>QRtVtX{GC(m~3{yC6`ZY-Dn_ZH&r zRHY8YTf6Y38Ev}o|2fd?e@`Lq@_Ya0=w10g6um!-;w@{m+I4e$LF`@^ZzEKCcBz5m zdFuR`h5EgCX!=cPY;-fO>0w|bYj<4D4sIVA8a{DqbYl4A;XBXx0Pa|K4!l)+9+&;8 zxO+WP)gAF>XtA=l$6HU1vP-g64j(x(e0XBqc|y`bD9W5I@O~n{+c$BZA;w*l#F zV;%09tAanOHaoklZUZ1^sdS}}tzzG?xGm5@uCYaEf-+@p*a#i6O%FsQX@v95Gv=JL z*hrK_xVJjyo2g5*|Cdo=)`lgpX4)1SE;WE1RwnQ64Rf=$$-r@6e=Kd;_XTnS!eh4E z(o$_@&cKxqMcsulE9*Jh<)T^E{<+H1dWmXJn;FzNdzKk)no|@7**`z*A1gXWq;h^? zOjoT9Osxn^sg}C8fVwf?xF5XnY5r^^Nzx+_xrHuY-54Y$UtLxHVeE;VQS{WuS=8P zDHPrx6_1`e^pU9VsD)_<3=%=ufwB6`LVZfA+_9hdTokuf%ez{By0s90WUJ+n>}shV z_{~lw8mu&?8Vkvx)tQ<4ax>YVI6%9ai6xA%EbGftbBfa_f{*$fCx;tL=a=Va=T?#f zH{9?N-yj)ToEk`y!G(py-ZhE*n|EP)AWr61R+gFvuLpSqv9d6lff^9TjSwXEgSU{l zK8};I`t-b(?c}P4K)8Qexg|<1aT8!qCg&FwS{GjAX`Gv1nNu!?{>2#)CubYeY5L8+ z1d}nL=d<%GaF)^!?nQW!9Wz&3u?&ZF znRJOTggRc5QriUa zGEVYyn-1z^or$BR(3Nl%VH`6$-dY59cxQBajXp&B*h%_KEY~d=zhm(D z@wEeu6LRB*$Ao=;n8~4GupA_)qps-sPJoV~+Gs%8ufq@z1ds2F6PyNzNB9LjP~E`j z&V3%}IN=QC-3=a3h6Ya%fEiDAUyLeD%;D2x!zXAbQjDKIG*05)>51Xw=;-K>5qo@i z>=h%2hsSSCj*p7`N0QUy!yqy=I5B8l0|RpLo&g*>O;ARJBQ!HSHg?*gE_;~eS3((v zK1kakqwwfSjaIxeJUVu#z={qw1N)L!-Z9K`CidWQD}#St>a%6mX4-+;5Zbt=EWPar!ox&(|ERF;(rbxeYtP7FJT}(!fB!fe* z0LW0N4F`;mcy^)D`02xUc$yEy2fi33gR98f<%7vR2!h7qzwkR4f<4*&a(k4#0yj%- z55ntEP5FVy^9Fka!h2!k281_;w8dodJk`f*i+8i4kIvQ4*3Q*d_9e&aGc(B$nRtkE zJsM*8U<4z%22 z53i{f_0v7PzU4}Lcy+a?pYGueEmzpXE2>5PbPsQCxh(!MdT*~5`PmmzNV8P1G!3-j zo!&0%4>1bGCUNj;3&-Q?6_fpwVuv`XEzH6XD|2L4PB#{Jtt96f%l9PHt8%_)S#nF1 zAj#cvR3h8;B`pUFeXq5YLqxjMVPLr>CSuv9%Y|Gv%nhDE+cMan1zr1zoB!A`05d?w zDw~GbiqyiAa_(Nzwo}P-B}>?RX|!AEaw?Rn#h1FQOqD8>I$jvw(d?yHTop=1O4sg+ zuLpVZCtud`>O$XDFmlMit$~wLi`k%Z+~_e-GT+PEE{$Ib2A14-TXbpvKF8hKC-*^8 zn3CO0%(zTeVNNL_7F4&SPBjJJ+dRIM%9J{`8HO%R@-|dqjf@ib-nO$Z-fLab|LxHW z`!7$7W&39u4Z7Dq1CzC7e%4o}K-IM@(8uj*9|iijJ?%pSxgDRKuSp-bUv_zXv-Lsd z;i2ftzFo;lMr6CYR&Q7gVjq@TSk!565xpHsdyDApP}-YP8xK`4^Swp%cIb*E{$cBl z!cerkZwSSL%m>$>SivRw`4vc~9tW4toaF^Ny{#tIE=8pXDnltMMNk>)uq;c+Sp=1# zZT%jVqI$n(SNwWIg=R;iYx~ZwM@3%AXNL$f3&*xsmG+y$F3Ea!ewHYil;_6c!uf$@ zsA2iIEWs4#l64-Yt_03UQ=H+`;?Zr1$GHT~N3XgjezD<9#beRFKDHUO{guYje)G$? z>{c&b^D;Q?uS|c(QW}+6FvmLjd=KOm7mr=KE0pN(*xu`G)fFNtzC7CRUsV9|@)Sr# z@BFHxB0ye#gI==#1tFSs;(d^05LOB+VCv4G5r4;$q795{B>89V9z zo3mZuqdOa`rb9PtGj;a1<@#A!FShR3$=UK)5$YyvTZGIk%XxW~wdZ5} z3Hq(tfqqjwT;Znv6s-=>x-&(qg9UPDDu_Bj>rOJ0-2gg3>&{Crk9V;ZkT3q4mLDzj zUAkr~@On%ld!G)p2}zc{rfr8Yq2+|XHoDZ{QK0A7`kpD#^J{(2E?@k$UZ_)|=hq_C zwUb%m_g#^r6$OCYl>#Y1)w?_=^NzmnO0MzU6##PA?mZp|QoH7t1G(e?sii=)Xlp4D z4WQ*daiUSz=!YK2wc-MmjkYC zbuI^7Q#7~z1u1Y%(i}6r#beGi-CepqeXWVQI|oivb@!J3>^F-+U6SYvemO=uDNG~?IFjg#N#IjS z^u-htB>GYm*g+C~DFq^lzLa7jiN4h5VMzIPWA%g~Zo z2(BwTDdf^cIW!gWy7Es=Tm~%lP(>cL(n3X=`bO{}I3ISj=I%|ohBCnJO<9sMbh$TW z$%2;bxeKr)F9Cio2V5+9E(ctgo=aJh`T#$d3Lm!7^O05^>nzeXdOn>wZKLNqoV4T} zV9xJ!S^*`1oJTA837ffyee?~{HGQku#JJr_O0oQbbMsSko6Jv`p3SC0=HDzUd(nBI z4vH8`wjh9vx+2j%0JffQ-y*F zcZszhYr8xig&jH3S1tW`Av)MM=xiIABFhPf!sy;_>M#+eSEJsPse~2Um|Y~lD8Y5c zA-nOq_w~UuTOcn0O9nA5a+dJN3y4Gi7WV(~!WB1#Vo}8xzchX|yADch>48GDv+qQ5 z*b>j&$v1b1DVcId)tsVXjGZ#O>F=gB_`Pf`UJ|yPgcI;By{_Xj%%~zcu|k?7Zz^8W z8{Y)AO4Z7n3oYL)6#A}Nvr%Tc2vb>0ms1Rhi@gt%gj4tCg)a!Hg`DJI^Vv{gA z9P-5PU9XWYEnQ_RQs02m3sARa4;a_m3k3JNe798i=|YKMpKrms(Sxq+0WE&I!0D}X zz6IriOAMbDELpDBkC39PlT7KrQiN3t@FnJ-P7)^r{t%+9WxR4es)e zEnPa9pF!4yMmdFNVg4R;z}#RE2ET|csCIjkQXD32&Bhgn=A8JifoZe-jkQYTr6e@%v#6F;PUY$^@u}Whd&ed z(r>36;$2<5XR&c^(M#|gZz5_n$ZR^>A;WtM+qTF1Jr&|=>3xOL3-DqbuP@H7%xzAG zl5IXPQo0@F1JePFj4RK(H~i*itQhFfTH;(1HgU(`=!|2MgWVy6mc!K2#{}NC($N z3m$CC9?;@Lg&t{;P8x2lmVPPUVk<3pkhR!Ki(gWUUA{$kwe-u3qko44OmmG~a*n$o z;Yedp&1`^V0rG$W22k6or4K(VP}=}S%?#8wKz-QJN#_=nKZ975{NWX&7>LG#Qzt3>OwooxZ!nB~6LelvD zePLU`2|JJ?IFY`j;Zgjg^o_R{k`Y`7*uR$SDv3mzr;nM+S74qj#0gK?(WpB~D8TtC z6c=1sh-3*4e2IWpq|kniy_~a|R!U`2uXD z;Z2NENcIV;h{E2rWY@s1eNqf*JhQs6aDKnC-j%TQ5A2ct!lLjOf(;?Lu@#-;&WhQ%}D?o!O@p8nEh4ry3DiAymEDSWwK^55yU9 zB&MJfvN@$(m*S3fECd4XWy2?3pi(G)yTHLdhdTx`#miq9KkhgN_WE2Qy1MWEj$>dF zBGyxZQBp^rW+e6It$;LWlI*VAI=uJSIpIvRW22X=I9z9e7hkFK{bKljJ41p;%WAlc zDHw81KmI%+muAq7-q1-i*)8>eP)2RZkw)>ZbG7qKq3cX~=Q==#)A|mr#5~ESp(eMz zEV=Q(ON<{qPykJg4vp@10meNCT^w=$i*NR7@9lxWI1)s&vkuMDcX@qt;HH#Oa5%&08br@B(YRTnz7e*a5=Up z0gG?Kg)QVaWpT<*(eYEave9v@ManX`F@#1KLMub~LMA_@AAX^*Er@MA85D_XeAf*D z zOk*x)En|~f245>nW3pF}!=_iEP^bP$zzpQ2(fo=Mjp|BMaj6(43=9-641e&0%qUm7 z3j_^Q9$_MUXX^`x2_L3}!;7H72g0>MxIfJ1UTX3mW^w{Y0>u|9x*_3=hWtk?HP0A$ z>q~;_@_N+bnI@$hHXbQ4F9{(9B zNV{-7?7`tM z99^3s35xjkLeh^tv*1T64}posuwUAIAS2h$T5$5%O*j}&blfBrf>_H`;&SpxP2oBm zIAXD}xF5lf(L}dv;nF|iSPZ}%rL&eAZ(31ipDC+rTDMhsp_g{0B+mH-36gV6SV++_ z$#9O?JF$^43AU549Y|tJ>SZ@Qp_3SMVC^kV?+<_I$+K$ii}OGAYXV#?itJ zGzx96AJh8DWt7bZv1s}3l>aw7r-6ZkcBgO&5i1FGV;vm~Q!1i)knY7~o%eB92%Tmv zXqz>o)v^nAX-(Fd)_H9S>7q@2P})#+nGu`zK6qD{7;E^M2=mG01%7lKD#kdabcO?| z0C3Qi8DS%l7&NW2({_m>46nfg zff8L)bX`-V>t}-4rbyFgf(=8FuAd1uOj*Xnw}aTG1SE<95Qmy=Lq2*_yLLV1#WjPsc{!HP5QW}Xgh`%k6ilLEIG?MRxkx-(4imrc(jO06E zBot}-oiGxL^#7eOl8Q$1-2y=sX(p6_L@_|pNWNR>xGKE0%2)3RMIe2*Kw!n;gf;fP z0>NbmNoEJd07=%^_X>oU{aYY?uRwg+7lHJ>0s-d7q<~h4hE;wtiry3z``#OE5*i5# zsdE|Tf*9_>CXV-*1;NTkyyO*VEs|X8ECgcwo<%_9?o=$AoyQgcD^_WwNWT?#pVqnr zEHa>-!`?-8tV`NzJEbYjTnrD#7MqEIWtB=`)s~RMWPBnBEe7tjY3kH90C}oNvbgC0 zX{?R21l#>Gf?&||CnJJ0m*HNZCi|s+#A(td^zc)d;4V5=kxBlx zD7srSKHG-$*yH%5&drZQ^j zE?|lnrRWXAilvCXBl3m{MKFCwgsi#|S=Cyt{Cw2%E@ahJd0FLEdT(X>t;b6(84W)l zwK-+CRZRI{9*VpVi4u?~21sJH zha&I!;jLBnJ|v1jdMNTfq*i&k9*)SX6Lly7iDG~xsr7Kw;cR@~0_oxCGAH6F0_ow1 z+`6lwPFuC|o~Y#)q0S4|P)CA*tb0!o83XT$+LTvQDz%9qzYqvQiOwmy&M7jCUkC)D zNYh^k1ffXhzYqx0Cd}R&5!ci(C;^FLfFv>g-l!u9Z>{o`yFw92?~RCUI-DTLBN4G} z2T25>7$Autk3>Yb{aYYC5)t0^MIb#A5#PQ6g0xpFAB+O@=y zee=PHaHJ4v7jZrmh(ifPC^|$aLYxl;;!vdNhXQdZg2;zNoOjAPZ5MSOjj&GtHQqhl zpkd0jPt!C#&3;mQ0k5eWV#6{UhO(fSgT)z>Xd)hQZpAu5QNsA2TvB>*7+f7J47Ig;{ zB%~gTK*1r%N~Czri$f9T%45+DFY*Wi>G91FwEbGq?sW+I=j#!adkr`NLAlo+&k&T_ z`FMt)+-r|#2$n(c@eIK-2tF90Wu|TQ`)`~J6yf-VsQHudBQ{KUl%7xWAzqG2{cRSGI60XC9 zAXn220Rh@PLjbcsbHN4*?DeDx_MM{a6LVVsxw?WWGwwXb55w_Z1ed67RwTKHW@oLn z(r6+tIjIvy>@~^;DkoA1BeRBAP6n_gSkV5gl+)CvbJ;op$;`t|xPJjdd(Fjk-rM4w z9^NQ4`D61b)XZTlDMwslYMF~+%{369XCz2oX&B=lhYrr#v$RWQbE)BGK_fz+4(Uaj zetFsJNWFRdH8tiSxH9l!kWz2lvD9E{ty6O~aP_lTZL(fpj1#kNX^F=Ji&Z|09CSqo zjO?1~;m~WEV_>|pHXlTD##~L!HE>zUJ?;*)H4nxdqLUNa*`|SS%|jScZKPC%h(UVg zGio`vvwENQvQENrIufpYy*;vPnuSLFJ`VZ854gW^S-5q#&j{(vbW~Hkug&B|H}EGY zD|7fH@D{vOf8q-B8o0>BdGcW)I+vbuqck-kq#k)&+|3TtkocK3(6(?=+3GQXt1mce zsAPHPO`;`6ejC0^MYnv|R8p%axuV@Qwm97p?kfm)+#dU}L>8O_iBpvOXBQfiwT1oZ z4B{J_#aLQCpF5sogXh42Gn2B}4zr4bOW}NT59_R?hHeMx?&jo+slG-?zlM$3?rJ zTx3^jy8=@Uzb@)5;?qsST;Qnp#y%Dv#Qh*Z>G1uK1}wo@Hl8yu@G6Bn$y07rG|d13 zBLiS)x#^X^e09Yxrq0Agb9HjRmxeCaNz2_|<~{0|5Z9fhMkDzIO%*sk>Ex`x^;(M~ z`9TlM3Wz&GdzBxAfSCUtJD01_l?|@6Z`%r-{2*hahOn3fCsT}52ff@&t4oF-GJr#s z9OG1O6BKs$$@?Xh@`!Vc$VK#DqPFcWH$9ft_{Au?s_$zB64Ka9G}vQIIYP!RgO{{o zLD>ySY$)0OWEW$b#PeQ3A2~ogE!2n+us#pj{-Vn+<;`a8$X}KMX2@DTPWuaf0?u_x z!g1g%Gwl&hZl7G!KJ~M4*z~Lo+oG%{jFL%^=X~fix~H@Jon44Xl+wef(MK3|T0|fD zV$lA03yr=QaSyexj6U+k2ye@`pSJg-vJdp03!1@L(K^*9rPQ7~%o03@yQn=5_<0a|Y`=2R7wfceODy z+?1eFYyY8uKr&VUgfn32d`a(z=3&01_kSguFX{bXiK@;9V}6*guSDdV|5DNB3&sDb zD7v-pokguRcP&{Mf%-RgH~qTs=b&G*5k|sO(akT7_ZSCI_nT2P%;}8@#X6ku zMKVBT=!+7qkoabF(~#w6#ZghQ%WpE!hJlZBb}{jt+7womqy@c1))w)9NGU8dH7)KCO0%%yi%29)=616H4J^|guB2E z{Ue+Qbu5E&CAN^ROs$l`3?an0rPw)D&M{Ywi`bUfW75vo_U#OcY>1D=-I{N1Q^ zdpv{}8sOTD|2`^p_1~NVrpNnRkighqQnSN{-^ z1sl4e{QpKI8{U#163ob{95~E#)^W|ErGvpv>!wuhZb8HcDLtDiSH94H1B;J6D#A7XM}M!CP(>s%ldVS)RLyMio{!{Pp7Dof;WxW9-j`LbKZ zfO=Cg>hJsCqfBEunyEETUt$H>-4&TEy|})FOlY_}Pn^1x2kNsjh*hJx=kqVZM_f!F z-7kY2CH5m-|pvk$@>hRCX9JvZJQpfCK71nOTZ8EvcGp*{-3att;g8jmlZA&h< zse3T&S&IrJZvtgKE^8Xg2QGa#-5~9w*-?t#&qtBzdsFd> zE8{mfLy??0B3ara>)n*}2Aek*QjL(OJ}aoH}edwf=Fv-R7Pq~8|zJP8k> ziOWxvYa_jCM4}S?iuH)%jl*R*89Q~DI5DmY`7D?URB?3YtS1Z-NQ-Ymw$fUS!y;{K z&`5R%`qZY~B{!JK&SKMz18zO#2X#)0x~9&IqI)nu#Q{P6@K`AAIZF`N5vY1?8JoeU zL|c}MzZb&PZCy}=Bf--nKx5aTFA}^Ng(-4Mp~$78GrXDFy$CB1a4X#-u3ZZ?H;}{& z#c+8G6m@3kg-OaDg#U+R3I>X*giUh@=lpMH{a zMh=U4dD~pn9^VoWm4}nEq|&`$tH6><_j;)4eXQ^tEcs9|)lQV|^-wX@PFj&D4`mXi z)i@N1;v^R@-&u^pj-%`-ilHB6N4+!aM*)#{7Q3DAnF}81=bgpfd%e_aGf>z4P$<&R zJBu&A)%Qd0)WeyCRPGeT&=2RKD5m34Y~kT-Jj$JVI2%tp{XCqFr=5NtmV|WUA+hRR z#V8=5j0K9JA7w1OE9*z*=(~#DK@uxt;a$aDE@B=>G5x%&xHkxOuEwJHqJ#0b6{d(I z^7`Lbj1KgDl`w@&FjNOAlsQJco%>+S2cyjf4$$f#7Z=M(R_Y3@gpzIzwRhK%zN+u< zF=lnM-pyx)No5`@R>^tO7lSZVf9BAlr77%kr1>oyZXz=aZk@h0R9Abl4LN-ja-_i- z7p9r8ekZq{!dh}_80ljvA98+KqYm`s05ipO-gJuLeZ_8Rdn%MfVm-Npeny*2&PoAPlXOp?x45njN<&`Y;CAGPDmAx#+;@ z?VSwm1I24y7=yPA?E}RdU!)JAX>6Rt_;4}W)Az=ln}e&RohK|IM@uL%DqTJ8J+KUi zvN`a~5|Xi-Ym&E<(gEpXVa85wpA#u0lmUez>t$6s+${_jq{48wFx(Gko1D&ye7L9& zhM31<3&Z`eR^W(@hjStyEpji$t?m{MHfuYSvysTRhHg!xv#E2AofM5O&;V9IqYE@X znxUZsC?73WUDzy+xbQ`^`lQO!7C#@UXt;u>6d+K2F{N!J@Rgr50XD<=`sVqwllWwC z{WvFw&)l?m@qRbWNKk)F*&=;SDwc!GON&6rNujUY8H&+B-~aMY7(DisT@4=uBud_fnYt(y z%*SfmV4e_`9_|d%ixZ*>As9>`L={3jkrAQ_A)Y8!gSAwJ5Kk1Z+aLdyBgFP<=`)fD zzj84`g!zI8bk+Sv&VANQU|P95b(GmH`O#}#+u%2{fpzUsNjYIV1Yi(>0NWwJXEFk8 zhX9|+mhg56@EI-PH<)DZsg^!lhA|ELR5 z$S`p+ig34)TfjYFPWJ)x9*ot`7X1R48(bIFsP!8v!1;f(0?hE5@1?96aaran%y z&rXZX)Ug|r8Pd@SoZ^Dk_6nY7wbhy?YkB5e4XX%6mfRik=lvebU~6{C-kR`n5(4Ff zksHasl!t&=u=7-U-I4F4tobTL@-n0Lt*&lH!sKR6z}eb;_U1?9H94jkT5zn$VZm~d zA)se&$mXp%g32OPMuk<)`q4NC(FC$5(tgb)>m}ZkOst5drbFBB^c?E)>zR zzgG0VQHto;Un}ApeM*wQLk_xc6r&@3|7Rf?a&aPOL3owK?l*JEH29S4gC0qLmiw@K z5BW3xOC8}x5a?*v#?D}!Ykk@AN#{{!QZ&#sPs1E85KgSit&`>vIIH^<(#zGnB4nhw z5wAy+E7ETzt|A`>4oSb5@LF)Aq+=ITRHpMta_G*}LJ=uOn--ztH!?cPFZzvQ_w}JN zbo@r~fV;W_LP5uG6mK35Z=vHiinkBN-*j|DOg&wUZtVNh%&hfH20I7i^f|U{MN8kw z4`|MMYgT;c$bT_&HY}Kwl#2T>;6O@wa-YsfDYkk#ShO7$_j$U=*-OWgiu*iWyyAvX zgp^Mg_g?RWI(x%2Mf=h+?5VxsnIhkbwuZJhRXDU+Upt{@E!~D9Hzc!kmY4m)AhNVtwh#5M; z7MD8_f4#A+M9C3kO!mYjDjTn=Egh7y@M&ZleKTnNn_9&V~i5+;l4UFd=WesSmU^wxl`<{ zLmGhb*t#&9<;?)p?p{^2A78#;yJ-{)W#kBqm2$^PzN&FB+%U=BYc2hj4E{O)JU7q` zy5nd;?O}{fI|o&}PbnH^8=H$G?CD;Hn#V_bAH>eNRh{?S?M>;%CGX$ru%iKQE_J(vYKpd=xSI58DRHd#x0a$UTx_8jxZ+6q3LEFd@T`%wpj+m& zxM}m&QYr`((#;oHuvWanLwuVcRt@4}jgL{)nr8$#00Y~%Wxy0$%ohP>MO_%xAvyPs z(sOrV6aXE>Kr4<%(Sg_Yc?&Y{D0yw4BIABX$zK>Hx8cu~Q1+jP3!^%uzyDmx>+h7n zgQC;lDMFN=E1|zj4ezX09xAnb%d6pX@jX;(>xn-g?^37K@OPIms((JRWnv$>3cM!% zb1^pYK5j&}jr-+`4D|Xs`IRAoR`&w1e=1sK3rd<@ws7z#Eh@n@%_ln7Sk*D?)FKOk zpKw@DVWfq|xuAK3MI>LJOMp$6FT&Kz2EM2ezKbAi+WnqV^np^*)wX!`drDl&as_c@UTc2dTPkhs+nV|* z_>hA_CJNeG1$w7O?g5kk-qMyXOOa3wDm+s10c0z?2W=?`-Nf>UR+uilQ4PxPFZtV^ ztV|0YxWKSIuo?u0|HN?-?;kBim-c<$agmIDCd5ydOk6~j&kC|VHQh@%Sw=)a(%rYll`a6>*NigM>fN&KQ>YeUx4IwWDybK4pOoz!W zisqc5P8IgyJ4ZC@5sba=*69vpWXxv=GJ>?`t&zUlonoX2^F1o&`!}+(J2jJ^C`C8* zy|rx1+kI#*OiXZ75hu_ZJ%&Udw-euvLcycs6aoudQVsm2jo6aJOm~(hl*+y)VuKfX zxszhjR@O?L+n33fgZ4Q8;S9|H6x~e{>x*wDA=&>6eXVSj^rGp1aaf(Z2;J^$Hi{nz z-*J{_i(5O|)= zT@*n;mMD|XmLmWW8Ve0%9X?N&P@Z**>M?F{*YkRtev2+>m5CpOWnQkaeouqe@LaO z7Rj%bTDRDxTAl8yt6yhl@3Z^4jQInRG>%ldx_hpu>9TLEB`fOJOJv{dF6P34B8y-7x;AlyGcdRS z#I$yt!Y9|cywac$i@O$)zLq)cdNUy_#a6cj`FeQGlmQ9P`5>0|_6$EAT=R;s^YPk7 z@8vQm5!dy6zXqz8uTv{7BR+#?8E%qFvgdS*lk

O&vI%HzvYrcJ@c3U1!lKF=K{L z35Y=6vH_~wf6Rlaj1M?nt|)zx(?~55u=b`B$y^r%vxTZ|O`6hUNc7{xj^8_M?!2@$ zL#eqs%N4qoPR;A7!>BZRC|eqit501~j=(&mDs zd+w>%-ThcPW;~~JLU4r7h4=TBLyT0Ipirgc^hk$PP_6;7WJAN457?0`g4C}s&nUhM zA>>jF2BOO{!3dv!Zh+XY>)X@o*Ts9G zGsY`cbNC=~&3H;@;Z|`a?BO%=LT7@;yla;LCyV=q9B$vCvGdgGvO7;^dkzT`O2}Lb zP)t9+xXQAEa{f*ZbcS#e-?1fB%==+e0()z9qit=U@DAWoFPrwvc^rM(%HYQ}2Q(1# zFwi`nQw>QxGSVDz>sA?-WWD>Wjy0=WE)m^{uOk)Cd2g4EcsqIRQdE}d*?IC}Z3(k`~@a}%WHS~Lu_|2o)ScY=U*!A+#8DQ=U*yut;g5pukDnk@YNFj+W(sQYu!Hfym@PbpWG3U{UU_B zFOqtc&pa}v^kTkR8;3S_M+o^F7;0LCh+oZ!D1XCOOFs7qvOvVImb|ByB1HUZ$$M%k zLd36@@YLRJGz8Lp9^zP1hpqi*X*@xzB{G)J@p)VZ2$avh077{nI?7Xo z@_!vDPm$jLI#8Y>l>h5Mc_n9k=Xs+17E%5?f%25VgQBB6Md*CZ#qWD`74Mo#`wCMV5g zd)~yPHc@vDi=E6P37<7jPBC9dsEqawkgq7z6c*#QH!ff2H9K6&z&b;}f)*Jt=*mtW|*Mnl;udicYXC*!FOzc+T^BtVE1!*G+yFLSFVi~!c zSc=TngMpzbLZ%1Ho}no+u@44@rU*kn7#Nx&4E>-OT0FQ_Jn`^z$b$d`RUigbb&=1* zfd_eO{dpdw2&xYU9^A^P-u*oB;8yYAy8{nW0uPFg2PwjX?+!fJU9G&g-16@{4=UsG zy=C%}{;Q6%c8doeEtBz{OAQF;JDyht^qoB4+~>Q2^$xJgPI8MJsY^#JI_-T-$&E5( z?G^~WAt7IfzZ3zR4I%FoN1QfhNb6Qqj^rppJhRN!rhq914p(;6mKqCzjW z757$`0>S>?3HCv*QE1k=NE$`kLcR$R8vZ9IyuZQGIWE{~&f>DgaGSLADXPZB1@5!e zGN4_c=aiB&Ny`yN{xjZw+hA&8UFO!YE=Bn9(Xh%W!U&IsRYnokeYEWJ`YE!?9xeO4 zeu}KJN6X~(Yn63tm3{0vTx9?RRUigbwaPvgRvB-tKflT-g6hY@D(hxck3CPTtXr$> zv9QW0fd@sm$|$nR9t*2%Teb4>a?9`gRi@P5kC)pniT_BeY@1fur?tvRWEzo!3j5#t zcb?Zp=K6TP*O+%1{r_)?F|Y6~6YKbvBFy;du*4|B1)mN}j3Rvd>9E8ovcx_emKa5r z*r&C`o|ba3O`+1?Do6iYxtJHto@sQ^!G3p&dB1sPcO!GmRfjw1j){p=@>P&6!d?1q z@~267NA?l+vkRXVoj>JvSJ%G8YbOoYd4O{6wxh!n3KLJ7bW>0GeC%=v4=>pR;C!b1f*tWK9uDRIST1ev-<4!z z@t4Zs2DAiEyKKVE0k@_pJS|AVfR^r?FZRGd*>{p82nf~uPC5GBvO6nI83y7(U?2&; z-zod+4tWc--zi__g6R}tpx-I`%K<3DK)+kAxbsRBVW8hFZ@b!L^fqbZpMMTzGyp*r zhyhh;aCYm;DZY3r4MZQC&`zNvsTRYryS;>9R@WaM zBSJNyLt!W$I>zeIPf-A=T-u#GZq{^{H_?y$*$%%9?7@Q`#O^&#To7q-m-I7?e6_=k zrSlVw-Fx;pZ=rcz-QqaSp%~&0gqj)ssL@kdWtPEkmIbd2d7b+B<1!a2e_lxyRb4s! zrE>ItBk(@u(kkq{g%=oWE?dh2M|Pi{i##$IpNXS%wPt2&LFWT*E$c9a6(pnH!)-6sk^95!WPl0sanGW%I>iT`4l*vvmPc= zyzgHoKjOcpZ3z;u*++S@w#;X>U(aW?g^WFV-Rgb%-ZvMV*I$$TXl{_P`wI7N-)7H% zYI<1;F(K}k%CY+l6h+C>KP{K~O;A~x)yxBTJ+M7sk^X6UixZ0W01$s#?zuc`!kYfm zGIv%gZ$>q!@Mq=HRcQqYGah6x<*E2HEk=n)Yhg_|tCl_h=AV_h2}wcRs<8ZWIdYd| zQUV@DCvhkualTyc*lw<4iop7ExyQNGDFW%sWg_PGJ)x?Q`}1b>t(DTosXL zPtQ%!~B@s2fZ*f5$ey&J{bVq4S9b=k|IO<^YS(N+Np?yNPDszxf9KlAQHs@ zNx9BXW{^aNC(BiL8k)*Lda}&D6b=%lub2JRyVXbm4T=Gh@+7}r_Gw?d1=82c+phA| zS3u+IWv6Wd^}?OK8Ft8h{8 zx4YROk2n0Ciz(|%wl2*Rx)g)2g~*$npmX6vLU5!Z@^OX6;ZU zXEKu{vvc!&2*!`ZIt@3v;7@S#=~DRl*w!Kqb$g2k6#mKfJ$3UVwSDt@_$MJl?S_5H z0sfo3#;Q&*q;q7VZyXrAoHd+W*Ki-Fu4t=<+yGf$NOp(OYh$Fz)V}ErO-%Ol)W0#*Hw|L*3_vpyAtv>>wjP(uKfrfb@w@cIwHk6;?6Ol~=CO8FEVQ)kdk8Qwj~ZW05U z^ONLpDp7;mWTa}QsUDf;ldajNQ~LzTEgH06yJ#;Z7p%bsX#9nCs*6{BAgDiu*X)-{ zV^kC%M@SJzxA=gqQ47x4pPUG{bq4UbP=YNcF1K@A>MIb`>)UZ=}AN?p##d77`9m;Gz9Q@m`h?0K$Hf=u~q?628HjI`&z{Vt$93tdF zn3p-fGIR7YUh?-dridA!Y{S$F^a~~h>wsET_Z*+0X0hUFS!B=i$%N@=F<3E&bqLWh zjr;Nnz*4g;>ojPnSb38iYBB!AV&tM3rbVe=m`Zl(4D``@4pb8D^c8Y z>X@(awPfQ!b7hHf_Q5D64?Ck|Uk}{w&LMc^7##$q4<*DYeH2mT-dFK@KSeZ__f>ja zYAi)mwf9xHlV2wcw#$lqq!N9*QZzl4GPOA=SB>A;iq%3{8qOyxrG0(RRFaXP(Mw9` zgYlBnUUk2qj^-wv85XDO(~x0|+BVD}^#WVm9oWD{0F)t_3N@1^=Pjd?Y9=M6B#x`Df0O1Dqqu+@Mgm@Y#KR#gwvw)n4^&0tZYh_<2q&A;lC01fwjPosN;1_JvQ#Vpg?a{ zX}~m(4Y=QE;Dg2!R7b08x})e5gn~PuZETT!i!!j9iQ+&P=g#1+Kh#&BJmZ*%8;Cf` z?X(B$IY0TD`II#>9x!fNtJC46a-ZW#cKeH85XV8d<8N}(-KldyUfIPcF7c30$;}g? zYAi~xlHG5MkR{G4bglw>fIarfivRwRJz(K}va;Pdd+h-W{gV}(y(tX*pP#H;c3s>Z zw>hBt{`n^>m+y`LAo7bHAWvwq|0e!Ecl1z`!rHK_1(Y%?dyPfN?dCzxI>^LG4v-wm z?(2RtfYq~$xWjY_xn5+7D;{=jFP6B;l}}h8T%ymMNEl{-aKx*rgTp>HVV*sO8~y1a zB(qLeI5ovjtwM?6JkZp6R<;6|%n+hSI5^gJAk!G5K;o`w?00T`X&!0nvMx$D{o$NH z&raHNtNfWpGxEhwaTreMr8-R+=q@)8M9t<--is&lb0`1B6ZyH52jhwS z+{t(SM1JlHn!6`7cOUBXbH{_HE2TYsKi`?&Xg|9Y(86Uv3}56%I=emPM&65@2z%gAW|v&Hhh;OY|#0woo!$c-Q7NR z#^Nw;*`#06jFm21pIWs8VXB6uYR7%J+RJ!KwkqzYJhqmctZg{0kJf}~tk0&%Aqmr^ zOyoP&zC^TG@n}9y>%J#)CPm2jLHX|5{AwIrTAH(+JFQ^s=E$W)Sb3R~-8!Zq#gY+_ z$gGZ68hcE3_*C$)ng-mKn8IcWWu934$#xUd`oYM7=KBwDf~pV%e>j)L*hY2wOp_Lx z$bw;#wf~~j*o~x0Z?~C817E&JrigvMfPIZ`704WtQOOLO{_hn&XBY)K>BfRAA(sKW*IW{|dmr}0&oeF+=<0&6Vg2VC`ZsD4gt6b$x7ZdUdthfpK#30S9eS5JJX1nm8= zablU!NZH%$6RL#VaIb6(B&1&n%?m*WrU#CA!Hf?LQbZlUx|blz{i1%@{2NYkdVw6^>=W5`WY>LEd?dqaupK>J>A5=H!Q=*ix`G>Akqp(K{H;o9Z(2dFa2{kZ zURnlQl6IyZ59p0r5DGcW0-RPl>0*RzgbVJ7%-sAE{I+sV=M~l-D>7Lnqko-yT7ndf zpDpWXxPCm{UxVLMhD@{B-~+j7%*&39NlvG|AVY0;Hh5XE+`E&UhKa*U7;|%FI6)s8 z7u z9`!JX=^9DvH9gZ^MG{|0YcPigXy_yWJu`E3offf2zgOwFB)kR9?^RylK88kR=InbF zK87ZHv`6;nKUAW(w>o>2GPVDq68FUaQvu$ita07nii-MUd2>|TS!zbQtzq4~t-;iZ zD6^C76b8bJGpM#L11M;tYqh6=b~38k6Zlc?!0zZF>@wzY(9P_ zNqVWekX{PJ3~%YBZ*28~z-m~ZJy&E+!26A@d|vaMM+zk0)LOc{k5gfpZ$Q19CNBnQ z%*@Dx25adv2dTquEuYD>LP@t}=@?!x(kKdA#-whF%^@!-M$Z}Yl45Vl6J4RaH?{f? z!fG^MtbIr=d}m9TiPeNIJefkVK;Yx#B$gd}Zsb>Fi_Z9hXxHX*! zsJ7|#z|6vQpB?I6sJjm>sUhw$|EYLax9`gD1Kd?Pcp@&zn^L`eVEtw2=0F&ANfNzJ z4ni$VF8ptK(<-T${yG`6no!F%HEYoT$|(dyZYfqX_F zyAJfY5g*pI$tgO%qv~3)!$3WFN8HIol9#e#UaE`r@>lOKrI;7SI($rX4jKUHXp^bt zE{>9&HZ{-~1g3V$%+zUKY9g>SHqk$fnQe$Mw7KURPx}t{n{jUIdtrPB!=!po>3HAE z^nJx;oKxUFM;>U*d~ zpkCsk79G?}fcj#f!U4Udm-k)AQ0*|XIU(~S)4etR47O=Bo}{H53aHKip;rcj)?Do6 zLoD_i6ngi&fpI*v*n8748(7Jq(0lWU4UE=1OE0$;u$4+W+-m;;=Ov&Zr;2r#lK+qhoVeJkcjw|l#g%^kLl|L-58;fsx@D*N@xhOThY9KDS zqlxT}N=~IY7I#WgpYb;bUmV4+8T_ybcc}3Zyr5iZhGMg&#wE2YXLL#YVtwJ_z@GHu zm1@&6)@iK|yT->p*#6(O$wrczm6n&rr)|G&XTR-#E;pKckfw<1t);oXyOLA-My4s% zYn3G|VXlO*U}bvy^Yz-cWyPSA*B8=V#QTfb+N7^oaDH`3J3Sd#CbG1Xx3~Ho8)3QC zi(rh};#R-6VE-1k`dxq{4#llE&6)^qabj&NyMVSD71iqaj7emEc9-U?6K$QL6Y}AC z?zcwae`q_FV(pKyx^1lii-%WVd%zZ5-NUwV@yTf;)O?C%a zuj+*4&Ek&4@s>8(9oODz^zobQj$QivPo-5wwYu$9MlSfH*LR{lH*}&sw>P)v_U^pg zI?pRh&+v_{=dv34qL`6F3^_{dzEx649NKp%&@WG~3U67u-v;7iRhfa3-PWVn! zap$%A5>utM6{sxNopd7YiZ^|w!`6b>R|=KSGx40q(1dR@75h4z>cEGYihUj`Uu7!x zU3Ty1LMTb$g{k+ z{wJS|$`fpygZ=NsS8|H@Ph&BKB-kF3?@h2>Z#{8Yw=OO8+GaTB3e_9q66f|-dSy~} z9JqX711Sfx0gPGIgNac5*WWF)1XK=vmoC*#ySybY1S+Olrc5{W_?8~lb)p*q&-A>MQ znOBco;Y#66OeVH_^VVHhWt2K_YszU^?2L3@HH~!ajZ$kU(QhY8w0}oeo!rjpn(=B& zRQ0c97j22!LP_u1w}cYSws5(d28CCrTc)aTBC=fUZtHTt!F-z^RKX2OMXScEDjy6J z4u#fVWKb%FH{1Q73auKHO5sgUq>Jh+v_ZE(UQ!INE{SvuORvxrka83?Chp8b8u>9% zU`#vwnCv>e;toG1N;KQSm|Qak((5V6Re3b)^%S&=^NPKmf|O|1>nT`}b+^+lAZK8a zww|47Z#W2%L0xXp@(vbva_-FaMxifyfqx|ft}p5gC7ShdM#E?&mszo|=aL{x{y&Zl Bz(@c9 literal 0 HcmV?d00001 diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageB.proto b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageB.proto new file mode 100644 index 00000000000000..0f9afe3f5bc8f5 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageB.proto @@ -0,0 +1,33 @@ +syntax = "proto3"; +package extended_protobuf; + +import "extended_protobuf/meta/meta.proto"; +import "extended_protobuf/meta/meta_field_options.proto"; +import "extended_protobuf/meta/meta_message_options.proto"; + +message Department { + int32 id = 1 [(meta.fld.is_primary_key) = true]; + string name = 2; +} + +/** + This is the description of the users table + + The comment added after thought +*/ +message Person { + string name = 1 [(meta.fld.classification) = "Classification.HighlyConfidential"]; // person name + + // unique identifier for a given person + int32 id = 2 + [(meta.fld.is_primary_key) = true]; + + // official email address + string email = 3 + [(meta.fld.classification_enum) = HighlyConfidential]; + + Department dept = 4; // department name of the person + + string test_coverage = 5 + [(meta.fld.product_type_bool) = true, (meta.fld.product_type) = "my type", (meta.fld.product_type_enum) = EVENT]; +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageB.protoc b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/messageB.protoc new file mode 100644 index 0000000000000000000000000000000000000000..a6b89272dd2b3ddc5e30b1fda8086ea2409fa172 GIT binary patch literal 56430 zcmd_TdwgD3dFQKlzZXecvMj&06Ir=silw*0Y}5T08!~;`pligUj`WsruBJ#ihn_ zV`63chO_nMn*FEW9#u*3mR?XH!H6BMWTIevbzUhRzAPK}P78X6tHr@N)v z8W+b0j&>KSZEP;}NPsMW!iKk4} z?@bnKllRwV>ix-ymHFj4=$A~^7Sg)YjU}txtevgLDVnu~g~qax&396ppKqLFOi6QY zVFnP3bM?u3(wL?%Ae^AUYltKyqe2Pbkf|HXk`3Dt%*iszFu2s#^Y39TTvCPYJAG|Fb7RD;(2hf{)*+6 zLbbTL+!J3Hm&z?sOSRY?ZSLD)B%NKENCu%}<80yqosZ+rxU71#>WDm3CcOJH{3oMl`HO72hyYT^jy$0ahziOT8^u z77jdIi2Z_q>dekT*rA?GH#AewPb|nJ!aJ{9nbUM=rn7m~Y)sZ+(S;d5#bVior0&*O z%+yXU)tOfy@i^CTMQyAq)5Fw>sv2i+d0X7U$O@wBWl@*mPJvgKMeU))NG|K#dv8kyn$4B7!t>~S zquH!2olnF$thwnqEy?L6CK;w$hB<1B>7Z>a0)JI`tMwddEPc@q>x%-f`l4;2M9+Pf z_V|jtx;onGUui5?dlE#E=Kbm|RbP?9H4#Dw#)^2SQb#E1UDxJ`3X`+kxH51%dC|0kG~9!sW=nGs|o@JtZ4`vTpPlt=z~vd@I|d2I6+f7YCti5Z2Rgh#G65w5gc~(b}vp zJuo*}@2MDoUF%R7bMM!}2<@taV4=OX$!6BmYCx`C2jqzMm$e|WcGX+s*2bc?!)8w# z^V4zQ@(n=D*GF8P(NMWM9CtSxD@&90Gn0*}`WZ?6o^}JUa~*&&t2ju#VXBRGxrU+4 zK3r*@Us$d^*wbko;!AzMT8THW!F)p&yFD)3E((K*lbVbRku_xP9}0Xls%E2I zDI6p4CmWM_L{-*69&NXVhiO-RS;lIu7vPi=ZfUOLw$pK!^qr*# zAh8=+yCKnk(UFb%W^Q1c0u(o{tK;;+8IxQRFYTk9q1H*HS?}xPuGQqJZi~y!<=WD+ z3A*yAE1)(O5oRXk%A@*O{TJDQi#Fiv)}gx^^4f9t-WG3KHGt>ab>AD}?HhuwzAE0f zvM`6@xP%5Nj&-ZQ=V>QE*7Mxyd~Ju|=+;#r+{3-?ZC`8ceu%%Q?>8#(wv7bOhRc`b zk5HVbFR=#{5bnNI9f-^GH53^{bYXYbf$P`NZlfOhkE_Y3YgxS`E}4)o8-%^j4}zBC zsI{wJ7PqUt=}lb+>Kx6p?5MBU%lY%B!UwC?r5y1y-BE+@FJ*m!trc}iQQ(5n*#N+Un-GWBi^|LuU?;oIZ4X2(zjS{ag=@93L6Lwo1#x zC&zESrMs9mIPD&k@)d4A&|Q{^ZIW;~!a&q7}v`e3j4*lTi z;nAVP&@cDw)X=~<&x(DM@wSZw>V``w#|u)jIu5Mo1%tJg7y7;&#alPfQ#S16&baJ& z*=@6X*0Gxa8E3EE16;ejs^P@pdR0Rp)*@Mb%{t-l_QzW{#9&S5{Q|j4hK{B({{owU zHqEu0!0qvtb%36?Fn(hp?zs?gHY9Z>5;dKfNW48;^Nxa7ZAb!}gtcVs`-MV$$;K+> zhB(|2SFrwP8`{yd->=`hPT1=;^d9xC^1HpmbpyT7DY)w}eRC9V-w^nQh+Yxn0$E&H zcDk1{GTQBl2`@?6mF4ukaKX-SkF3H=Goosz={RuZI-)vc*5X;cG47t6pTi4t2CK1N zlX;28g(y8(p03R|>!X|9>#^`wEli79%C*qGlC^NJ(-wU{)gE`q*7BWKuZlbGt36OV z6O1ZnYjx<^Dd*RrlWkUoG1iyPOv>qH%8j;(_R*@|jp&uR!}O;5l6b4NIO`okXJi;Q zd*VE1TT~->jD%4WweSjSHd9}yFV&XoXYedA?lX9D&dk=Dvpw4c;84_47~P?YM}55^ zt2VGObr+SY2jfc&3N(a(XC`OsllPxlS)P7r&t>`k4MXTcok3MOO^xbU+}Xe~IXn0I z`Wf6Awug2dSTCA#6zmrz)!+p7$ied1siC36qa6Xtk;YOLhMH-l1nw}Zwa+vHc_G&1 zwB!O>T&V> z+Ge-K+ZJaR*SFi7Z&#(t+J@JfWxiC$PgmwIzNu zdv;po((=*@R>b7wki8tRch$Y|md3<=la3?t4&!=xu;&^Yw2yAq3**OA_PDwS*v;A6 z(xNFpO}M?@bFHiDo=XR^$6681b92+n0pc#J)?v>)wB2#HC}B$>RNHOUx_F-N7sX9A zwJKbvd#qX~53?R`i7!zl#A8i9++6K_R$F98WZ28B6?iVKOq@>{Yrn%&&jM?`Xv%rt z!MHQe_i~II=cACIXAO=V9y&8N{OTce5md9|!{b9|Mo;5JKQYu@%qjNAN^RG5?~>Ng zwc1jv{y=;w6f?IXp=CxlyHhJl?s`KqMkKYD-gC znR0~7Pj#j?DW4=OTXH>xa`{4I%va8~p8;RBx|)3@@Y9BQ_C>osTU&&GmzU0)e%?0P z#)Bc1|5?l=s9EYR^WSoJh5uH%TlsHWcRT-W?~ePvRE#^##L>jzpOHZ{DeJ>1+I8US zb=y?FcK@I@w}Ta@E|0pFa<#CO?q^jSJx6`1dNi)w*EHDLjk-4U2I5EZnDieTGvMvV z#?G7^89gy@eAG9u?ubkCwb!3ty$#x<=Q}MxkjrlM=3|duyh-W1Aub!e6g=_xE;h{lRN#9H2iX(!SPf7-YN3Q5$*y7UL(x$?t2j zF6A}}bNr%7fLzyo|FIBv1s~Z(60*u9@QtV)wdu3!&1piwzvl zljqOO*B`(`(`Q0sy_<1W5BQRZxc_YwKsx`FcyG47%y&aB$M zPKs~Pre9jbDLjrosDgF`m55hQ&!OQvtK1njo3rO;1y=XP)y0O3T;rq{BLy+$Qx%5MA*)+w`?(in9jYc!oAVgF5C}{5}jCQzZ4xp4Xr`J0y=r7L}G6?>1xZGjGQjV(YElqqw=M(B`jdLSZ6Bb;lVG3T7c zMxrFbz11<-OkJXVzkmv}IxK-T(>C95sR8V;GI@8eo14{5`j7kiqiM_D&yy1n9<$w+ z7HiA12CjT4>Mo30S}TKQnSBfy9yPO=j z@y3_>2FdWkWPg$j%+DwGu1VzIybDwPaWcESyx2T=1IQzYm4(p^)POKLf-z35kR~H=hDV$s@=wAe4kO5(+9rsX zagv|gbWkVjOdKtRu7s-yyzRjTFr^Emc%;?j*CcLX)KU_ zl8oOqJeG`&92tM*!01rIZ}Q1r!7hC`Idl&%hmygOQ}>JxAH8clxohP3;i1v7WZ>js zo}MJMaOm{-$mm#{^bL&Bw9noQoV+I)dex~>0t?B=Xfk}_)bU}U0e*A$fcMTjr zzIvc>LT=p9sIbouGdVN_mIDNJ)D>Od1JF@a8x1J?H3;!Q@c6zs!D%o!%rEGH>IO#l z?DIg!2xlnoZs2%wc;Ey9n6YH{#i+u>44xhxIzc;;V(j#xF%tJqj}IkBM@9}CvB!o+ zUokv5G-uSkb{|U|;geyM}nqM4xoj8W2Hb(hZL1tI;96!bE4|Oim6R zJwALCg?32zj0nsthsTEYFk!>sc2qD$=2s5TDU3qL;z;mfiu60jx)7P%#bo$MGI016 z0683L!vSN%o?U1(c6#tGPxJoxz!#!qU;ESVfj~Hv`;ry06|@sxMa0~#R5QgoSVu&PJG59 zn$ALVUYZFK48P>^{COoE))J`$Yj*QEl_Ir;^Ij(AlNEQ@`^kI@k_Lai^soexmds)9 zEG;+pFfNznxu>N^zuKzB>st=T?QzuB;eNV@`&#zdLsGf)(>>hZa+5uzk$$>|{VliI z!)vQW{d5m+Xt~NBUQ;dVr+au~%T9Z^vs%Z4G)o0b(?A>E z>Fu)q5Tj6R0tc_Qa6GPFG1)&Mc8HVO{0#iCJWFQfRAXV+a&oS*bbm6nBIkRSCAUZk zlH3mCq6r0lRoae{EGM%>x0b0 zL(x^eyONWP$aZ(F-mnV98OUwg393z%d&)=MNm1s zrO%^MRPWdBieGQ2(Clb*UGLeosK_h%{18EA;n?=7(tcCeC0WnT%@8G%^4wUMKi{7m zZdg7pOEAT`WSxhpD}nRT6leIfcyvqRaV~-L(W|eGZ#A5$cr4o2%Ql0yzuZ{dZ+;n< z-Rh-lUIwTAmFe$TN~1Ch=2%Cs?}5DH;<3wig%bT8+k3sOx~KWLzSkgv>0rMMzHw0Lk$^QNJD3298^#U6N%T0`GJHNLRWm>>rjd=-UH#srnI-Qb>gAv8k9YLbR zkn42s-cS>Ao!;Lczs9J=j(JaXW$!85F-`Y51Gks03D!p&t+G{yVwfI7k^F54;6YZ zTeTHByXKbzx#R$;r9iZ3Ybg*7 zpyrnYH33rd%fSkh(e)m_#OP!SqzI5nFCwT2kjcxg3pD{Uxo5A(gl1D-M1Y8u9LrDw z$dnfmlmIg2MFb^)OkKYZkkF)vUNIfrPaq6AK#7{uaSz8ODk>=zf2MRrUni1!a-LHN zCj7hNR`-%FXG)!MT4o|tOfiu}U+i`AJ&IZZvUtt4@rNZx#Z;x0=)q#|I}3?rXh|#t z*Oi?Va_OQRnhJSc`KKl>b0^kN2F8OTa}splH>v}*Bz3$P?Fp~wU2G*f2g9!R-{GQb{4S&}kzc_3xU zf|l&L3$P?F0e&tATr7Dm2V9t*OIeco06&)sAGXo+kyah+EYdc5KAkyjqvt!EwB#LN z&hKzq0VRN(M=SVIo4JU6^bOIqy(`(oxZO!gvHXFvbCa_h%ukq}jiy58-wZ2z!FizM zPjZ`*>=x(F&YMHAsN$_Ji(k#IgA!YMxDf5=J&_Dr z;+Z@7<_<9>lkTXRQ#6dRQ)V~)-LwY3m#xN2!j_Y80=}uobzFuSRU{`?NOR;(#qB-u z%}}dUt-QI=@{K~F_u5q(Wu}WTm9=y^#gMp&sZ=e!xzKhwQmn*ZOSSTrLi9wTXdO`I z^;-(99q|}4(ph$;w-rj;dIysmb#yJAZJf(VeQ60lBHF>o5^h7iK3AVxa>gw-36sMi zPyF8X8tKx~RW>8_4Jf?;b#wNBalNfTaIedEONE~-lnD0u7OWdR=*k|@;wKB7-a6-7 zQ2r@e^v)zF5j(G*n|U>-8HtYKeV)Prf6@ok5-+w8R`=JFn{Gs}TE?hNlGE7WF7MdV zrIWd7WKC$4Q+Vd*?nejA4F-Xro-@=q7RUoo3xk1qei|C4xJ$P3ARB&>7C)`whrz&` zkGx=mx%t5JAB@<8p0mM78G9r@7?}UjLTP(C7#)1%L3j3m7LOLTZcB&H%6cr{g1t)* zvKAFuJeF_KS}i?}BuzoGQR%^^>;WwvFLZB7`)I3{-jOFU0#Xk$61UOf9U}3U3Z7xw ztEG2~v;Q-eLTzUMRDGU=L}|Baz!T0sy3|-%G?i2Aou%EGx~u&yQ;Jz^rVHQPpWM|r zM?`07pL65gOk6--j5H#U9)oQz&tWODMJjnuUJ7%um?bb{9V>&&$CuP24tX8^Ox#Pq zopOkGW#RsX#<>MA!E?NcsMR2|>1>A#?=EcF8t?a1h^wXd6iS!i#W-GHm|33Pm<}o7 zu_aVLo4Xi*?=hF1q*-Y2}mn(3^T z-k)chPFnCFo6t^LyuU!OF|^oJEqx#_GB(kI2N~0BqQwUa-PyYAs+K-jC|#Nku8S5t z*pfY<#Rm)9q(M4qxVc*T`FxAbwBSM3Vlyp%UM+U{7Twj-FEEb2OC?~MtK^b%+yx0o z8iQ(P10)NO2MjQP+EOij=y`$K0w`)`ptb<&LylGw`h{xcBZcS}Q=u>6`jJ9wCw?sr z6P2f0`u7a8xA%fVKge=ZmsB|{RvysVzb|YLL_^1^mj36AXq4T9tOW;gsgY|z`N@0> zf#5;bLL&A_wa}hf622cVVESp3q=fXO=yrCBNUV<+&_4WIV12xR_2-KS=Z_cAz#JrA z{!$^j+`ptt6$2!cIf}S~TvLtXmkL`h4MiaR5=N_s1cv`1!wK6$#Q+J@f?^6u|n8M%_R4c8QK5K3ykZG$?EL&Qu6jQm+1DehLbvp%e5M+Qt&p_djJ+9wMgo5Nd% z_Q^u`wotUOa60=}IM&3_X!}$lN_syOnX@j~_`wf;-fYY(X6y&zj5rcg z&T690PlOwh&#@`##4pFbNUs z$-pS7qfaxEdh?b+8Z=3E*KHl%`|F%=rrEL4%T*k%v%rh5)cHO!e4m{m!J}n0T*ed( zxvC$3o{&p3=tghoq?zoNdO#?nw&+Nsc-Ohwd8W{HCcSeFpux1hLn|>)vT3Nv9WPIA zI`C5CM-LQ0<0FShcDn%Mo`Wuqxc}B$yxMzbATW*u(JU=O@6TquB6aMu1?(|LZ+r<9 z`>u|E%n2WKvCkI>K|CONVj@F^pj(P|WKwIf2G^258#usI$0A8A6_RG`wH;iJElR-R z8+Ty~`Au1zvQu>Y)U9lE9BYxX3~mge5r)vp5I&#DPw9uBFKh{78&3vBq8i_ELqIwF zLk;1XLUL@3n6s9X>qk;So?G3uQFm@lFI5#-SrwZ738q#RVdV{e-beLd1*AatVE-_(o|e3h6w`$#S6n9{2(*RmF@yT z!<0vu2;bTI{2<}OlyGU zUMcCAzOj*OWza%nr7tc`A-2uZc8zR{ARom5It4vFaw~{h=^hu? zAZ&)Zy+xr=1+pD?FE`j-h{9vj^0WctI(UgYV5T*}B&XOIaLe*N#$lmej~eQXz=7OeQWTkJJ>d!+|3f z8Vmao{1{DiyBaS2BaX!Y%uzaPsqv;2W%ik}x~6nnl^1$xS4!fXo0lLt$ApCxEt3rA zh`kdV36o$u3EP1rwxnKm(-S(0F$dP(!qk2;wicy)*lN#5yp1d@_a>8~jBgw*+(4ty z_S!M6om@uQY!HiXXV3*cpooSuZmXI#m)CZ*vRhJpDY43w~g^96-pNTM^OkUte$Dv}3Q%YwzkO}|? zU6~O!5{Y5AM&@fX`|{|WhXP6?0ee4<@OX#eJ!(rm~Zunxs~ccrmi`FEh(R9Ypz_ae6!H>$bPpco*@8vAa6@Unjkr0*7pFZ&{pzFQ!`{G=4n3em92k44d&qGIp6qYXkMK_PW6 z!(0%TWzN_rJ0N2;n-p`F|e#s39Q;8a+r)y1fj*iy*5pqx&|Om6-gF1A0Umj zah71aUq%oNdj42MaOQH{3lzm54@c|bGjfF3TPBCso#utH2e8b;5kZu+1^^$92+^bu zn4*UxVl<%#D*Sk4L7IF}GE^5M@jB~4hW(G{d(g7@@rXE0+Jqi{9249{$0{<(-x@_X zZX6xEAPcO1v6dBL>6~WudRAG>4rqjAJiF|b3$g~%Esc&j29|*ivVq$P9#iaa$}%Hi zsBev~xZZO+SvVB?Z;0n@Ez|Vv5!TjgvtUw&VRE5}(o)@Nv>OJE80Pw@k;7C*E!_o7 z5u+5nVOX&gvA0LwP@xE>Z;y~wHzBK9tCgRPTHb-Ix;ihbyh`t_Y`^t*sU@S~XQMW! z?6!(2ABnsVi4vGlbeK>ClScw36lwZMG!hh$Bwq}HR6_aRXN62$;XtoCT+ zJwLp)%HD@W5lD|l-iOpGFW2J{d3B-=B_L4@kR-Jpk2;)<&s!ip9$oH497P~K9+6vj z4b*9?R^Ao0{5z;~$tvnd5Ri562_j?QT~V9zYD%Rx5#;9rK`7BVMb|k+hVgTOAQWl( zbAccf>HOycLE41byCdS78U`gGQ4EkI#@`)vB;l=9zG_z}0_oimu}y~)1bJ^nY}-K+ zK_~`DBFK9qqTBv0klq^+-u6Wxy*DDheIo>EuU0+~wR{|c^sOQYM;f?m!&$Vei6i^w z0}8&T z%C%3^G)I%IGBcQw+~xq-(BpzKi@OC4S_bV}p`SWo(2eNAv@SAE(ZbEX30kB#mP9cS zV^m-kMkUUDA{&)B_lbx^LSLCtJrQAZJNhtDAC96v|56i0F+d_Ak)mJM?Z&wuj=1o_ zK@#VFI7*b3Ao5W9XcS%RUrLry43H$tJ{l1gcTFYBJ{s+CyhKxLd-XN(kU^s9lTmaV z*HCiYkPQp{lnswlCKkj+*+)$z*EU?g5uSV6G((>fQou=z4Ccwm`#alBNIe;K2NWcv zo{T`jA;?OkcR!15tMrkH~~Sq*FKgZD7Eur z8G>@JeJn$;41yoa5G;e>$AsXMU96h8TKO+g%kLKp2Tmq0|EK>EH#R-I86WulP05gq zS8OS%6Zw2}ODDCha<+!O<87)Y31`dh3|)R@#L0cSaW^{@haWgicW`;ub{q+&XA=sh z>r~R5*SRg!8cs&BDBzBHQcbW{l3}~714u(`Ntj@W`73IF$fgYD|9VP_1 znq~+H(8d`8nEjaxHc()%Crz;L6lEWu)%wrX6-=3N=P`a5j`t$CL~XMo$vreXYqga| z6M4x=oiJjrQZ`UIkwO@mHN0{%fGxp-_GhJ>rZ%0+)(J>v9&W<@^BCHzE~fL|7U%Tv zMxn_cn@^!;4r56<;u4cfTnuZjfdD-tLGnt&82>nQaMqrsT{4@C4L1uK5&CpUFVghO z%U(z7&Ec=9F$ckwffs|6dgG4822*RDnyZ1UpT%ku_4-1bm~~4_JRVrA@>%4dD>`6g z*HjOMUeg={$Pa$N{f*1Qt-F0jNN1*_n&N$JCNH^>KS5cZ z#V3Kc;AQ#~SD4qpMJCRZ4-3(`^pqQ=sR<$V$lKy>c9@35&#ZyAg_Fuwj{#hL!BImc z%R6rpEiv-j@Lejp<-?|uT0O}X?XI!K>5gz;M!4hl*pDSL?;J>+qTD|--m;{(J0Pu0mHfxYE9DD{%6IjEx$?Vgj5@F-{%yaxbkc8h*$C4pnlD zQ@Kr0*xe`ZmsH9l&M_hv(SM2Bwz}N(SX$#3qUh@0uNFv1V=vKQk1^#48M_Q#(uxIT zHzcv4Wc!m{jBOIndj);u0P!?mBSygbJY@TeF1wUBo3$fx5Ca#j1(&&FZXvo>stvYIeTCP1F^p;PFd&hmG5AtF&q52HpOVc2OAedG&4 z`{OM%`a;A#)V?zM$QL5KE#H*4C6;#lHLF&T&~d(#?_5 zh?gfF!cX+byHmB7BC}joBW3VFx^n_+8?Xd%pl3Su?mcWuW+_PFJ|~Z-ocH*lvr`}G zB7=7RdUJ2juf_z9v>VC7l5@Ad$rpngN0K0finaA{Y2 zf=aFZhXMl0SOE~ufTi;#y&sx~`I6rMVmZ|rXRljm-x$7UcfJ1ekRXVXQcO8P|^cLL3}4-0T)B=(X8G0*Ph zf)ZijZb)(m$@Ik)HV4p=9zlST-Byvfy=X>zwL7mTTcDRw$_3=@?3GQO5iz^sj}FoV zFb#QCTh~)a{KARv!zU)cgQlrgI%D;{_%6?O^52KkMi=HgK};uECK(eICRrxgGr>rR z%_Ms!>UPcqicIn|(RI!dK@lV2nP|^JzhonfglD2#UKZ~$4xsKgqG*WI8{>*~IN^(A zfXdJpC0fT7-C~zDN8Pjt65q&>kTv&>sC$2?Y=Cb(5Q-r2jp*jXmYWqvMa3?^5#2Hv z|G5(!fc~2(x}*31h{T8%AMlYri-b5%Jm6IM3$bFV=R;uBA$@5>pJJG-+ty{erSkmr z6l;Y0cuYq+Thm%&pRg~Dz|RUoogP23|E2Qq!PZS~Xta2xD#@&qvTkY^`p^k?fgAcq zI1lPr2IWd@AzhhTDT5h8h;d7?bE=$Ut{NAyEwaa?ov-fO85G(4D3-l|N{9J_7DMpb z6z5dqw7xmKgy+tkX-el%<}ic~b10wP-(+*BM3ldYx`VWkIs7-#9_MT{#}vhVFZWZ5 z7mwm?KM)@mBPloeJ5ltHINE<8Ihrn$AozR?P3)z6B=bjV5h*rvO`ZolEwlMMQR~+D zFkWbYYcu})sMOVWOA4G;{QWx4&&nWHjpm-uzX%_3F@1Eu z404no>^RnBFx-PC=T57`Kg+Y^Dojfqvx`+&y9u|+v@MB@Ec+|-7e%?#tRU0nD0tkh=ecP2@nE$%rI9zqkB zpD5Qxdew+TCHfWX5ycya%Th9WYLGZFt_t}qm@rraern90s!(~SdeJ>>^=PKvsw&Wxh_F+artLH+PpDD62*5Z4i?dTj}t!KXx9 zmWsa@!qjbDP=q7F(<4A**P$;GycvZla!R4drJ^&u>DmJbD-dui-6O7D3pF>8#0$l6 zc?%SEX6S`U${vLOhhz!{imHTCCHq3s%Ch31`v^`IqpfqKI2)XCEx2Cu6my?`l5<85 zi#d7QT+|-l5)hS#ld`1Jy*J6xl@m45>mNS6hl9phoYE{N3n&+v+*c*>hWwm?ez0_HlB9+d0Z0GjfcdlcNC+5 zgfbQ=hJKW>@Q$n>nWOJ0b_YqUjD>d;ce#jp9L4nWj^f@R)VUgq;!6(3-%^+&j>zkO zPcb^s`xU|zGQm(Cq)_G<@lNi8F&~UJ8#q9#gIruJCt0b>uo6nTHPqf+NBXM1zsH!> z&3ZSV6(*H=tXL)IONo6qB1g ze{?|4_Y|+ck*h*PPMvQk5{!6V;%;YlR;^rTtYX$X*Ekz)BBC?Jy|v@B1U&h@SJj+= zbqHtDh}9IZ+eM7gr>1^65~U~Xix7Ob8Zf2NnOT#yRG-NbM|lx=c4EApYDJ{V#i zi%ksoLt25uHXhE2e5A;|7`MAyIM}T1P|kWH-yXU(jn1adId)PsxYK9^~IF7k-%4e(gfHH=Np>m&raZz!S&;u z96oc?=EeKnG$TR%F=dPNHK|w*E-x(tAt!~x4s13T?k4Re5O6c}geGYbdVVaUr;g`* zteEsA?(YaLe}_PZtnm4&FIgO1C( z!5swBBDnu@hP%!K{c^?&e9wgO z4Uy&!6o;Z^06838x@;b5K;Nm$fuBtE4YJs*MdXZ_0k;xYV2G_&A(0M#3xj6>VenfR z{I6w$*9oRyEBaKAINHMCf2~Mv-Dln4s~l%2M*Y42%Rgc8*jIKnd=QW*dFy8CqEs*+ zt8IgMLRfmZGe|E^h$@6&Fo6(N2=P=#h$@75s#p!yQWZixRlI(G{F{ytTdSo{OCtQz z#Rw7R3m(u__v<!KSCZy+ra!YMSp~xBA9=s$Pw~46-b$sX7;;9vOr#+ zMu=VfMIXZa{?3s9kB`DMNF6x;-J(Bm9@~L)ite-=MeP3HEpp&od7d#!?e7)s` z?KJ$oV#g*Y&D!P1&lhQE#ZKHj9o5Po6kC3_#Es{Gk=pbJ#kNc0KQ1$g9dg8cwTP$g zGi3>4GXx~V-1dMz6!PqE3LXsA%AbtYe5Gb>>e$NdI;Q-jVT%3I8PPN+xVM+vcx=nl zbbJ3ufNokSaUQIkdx)a>FV~9w(~V0V%PMs)d9pJ&=gu*lVV&O4DFlq2Y3|9?$7%N2 zX_4tVc4IP4Iy!+oG-(wkU%`Vwn6+TWvpqwyr zBl%bI5D*J?o=UGf@|~1bUxi3sZq&Zr)y+tl+@c9MTYJ#n{7}3q$23C=jukm9SS~UI z^vn&}ygf%yS%k`{u&P-<8s`9-K=wr1uUVziE?k-tO1|7zX7dLf_;z>a8nZ@vhGBtg z)#slo>`Od=f0&)Mx&Kn_lS4klW!oF%#~N{Ta(kQ0I9WbtF<_jtw)BG$uIMt;u5&rE z=fyXsp6jw;%^)TU_tDy#fU^-0uy$Hx(SJ4Q*t}&Oe6`r^Qr##bAii28)lI^MB0Bb0 zi{3X%5gq%hMSP>rNb+~cLHG4ybfowHEF_0roXA-aUL~>n&0I1KJ|+90N79$&KJ4B@ z{*3=pN4OCLI@-0d(^%(PUv_-bd6bzH4K&TuFoz3-6YFy8qOO_^ay73A8EJ09 z>(S(j^xKK6$cKSL(k~{w7ThT5*u@l;sXUS#y7RP9M2gX-Mdw(3;W3$#4j1IJXYNM_SJ|$b7;=H=z;8ZQ@1kjp8qt zFECpnB&?c7twbm)n>@eXqDU<&SJ?OU_b7_`P1qlA07uMQ1g1kVN5IId>y6g%sX z24Fn4E{tY*GXS-_R~7BYmoM0E8pT2xIRay)+;NhxY8(tVOtSY{OTQ(9f6hP84KxGp zI9gD97-Q4ULDe2qiiX+7=Hdu@x|gBm@zLG~v2$)!=lynjQ@SzvY_@EJQUI1E5~P7{ zHhOc(`?osmXuz9G-7cY;qOB*cCcQ>V9P9ljO3@}RwonXQaU^|(jdNmn*2r4WEpuAj zwE2lrDhL$P%@SpZVjsB*ye9s0 zF*fl&ZbY|@`{j!a^!hsal_7yv_X4ngDq3X=N}64^aPTKBD8V$%Cpyo z$Uz|!1#PVYy;CFifXRP%X;YV_NT>!C-dpkkWGlM|Z7B%d#PVLPFkN_~8kFBx^0z%% znHD^7fnj@KH3$s;0VLA{qNkh@UQ*xQHsB6=ZvAx|eXWjEI1wyNOTp zpt_1g1D%DG8dK|7%LN-`6LK!92AgT8r?UWEwue4b5d2Zg+zqCU!K@53AC-L$hbZ>y ztN?!KNLy`biCqgh@2?Ugt3&wsG^+WvcP34cV9G54;VLN9JKfzHLS71Z84hrn4wG9H z%{fDzD(u5|j%d~+7<=8V(;djjn9mMm1ZmA%BYn3!#Yhq6dqT|jZ)Ih7Y9>EXif-=x ziLxzk_o2BkF~LnmoIq>z7!rNVPJBBG1&@wX2rO(#HSm`(?x86cRvi}$QTG=Y;MbrP{usU}Uy4}}o6h9EY z@8&-DW=_mh^w0xFa&9~>+a}mNMs;*8<(wm*r0>sJCXM{vSVOSTta1x|I!rAg@H~^d zD1v}2Q6`-&M*t!;<{QX5e4Z|$JnI(KW6a{N=k+%I7G2OP6F&&cyj*3+0Xc)>gsvDT zC<(cDeJ+E$&nW8WWTKA2txt4#*^7a00bDm%u_0n#4itl@0@R6XtRJ$9Sp`ya$8(un z(s>Bq(VYhECOgSR&22{Yij~e|*^9Len9I145_gqoaW#3p!R@ASS$iCTQvXXnPs-<5S zs2%2!&rOZvxsb}EwnBrt888XnU(W-h{M3fP+%)HbsblJ9z_iw%60rX+9jsFX`(lY4 z+CS?IIT~g@x<$k+c@uH?=Mf>BI&eI1OoY|!?2ksf#-dST#x$Q2 z5P`g915~&Fm_yK z-(Ro0`>}M)cvk0x;0T`!@9!&z7^yHpp-Rc=kq)V#TmxdshK4a8up?Onsb623R(usg z$fXzzM3-fP8LGgr&%L&*Sj>?MIW5aakSswmNsg?5KQSofuP_YV0I^@!x2M;yi}ylj zj90AY@ImC7@s!TO?cz$isLD1a`C!eB zF#F`K#t$z{90M-#9Y*6j5yk=GB};GsOJ&u%_c;r0!8_KWiw)@wadaRNp`$6c+gso> z``!QxLwReS;?k1VB)H|~CP>TGXc%b!b+Ene1jAmBhOYVvx4SrP$@#h|$vw{xq&U=+@C$^1nobx$ouQ@^#!qMStAyXDOP4w? zQfH)u-=|BxF3dp@)SfO~yVtJy?v&>D%nRtX0U)RXF`&vV`V_s_hPR;lOz_%J1l4DP z*G3sp-*}O{Hl6a?d?R>mD1iq>=e41T0{0DhZ7#S^32_v!eCkk*VZ9Q^L%{ z)+tx}v%x7w2}t=OkgO>G&!%eSTcwu2^Zp;@Jbg?4ANk8R$-VH`FCgUs2<3t3C{Gc} z|8<}|MSB11KzWK#{;vb&m7MkM7m4zlMEP$A%2NUlijMLWq1U&AzpSfT`TJ7KPnKQS zTFF^|U*a&|z0j*m^!mpVHzQiMhim%Wgrh$MQr?1dyn zB+ETZ$aiQW$%op$hhB9_ReUf9KNmm;y9zbw7A|@_ReTZ;6c$XE{c%hZDsF_-dwG` zz1;FHzqpjC_x5sIPyDxH-_2TGkBEK0wuXJ3mGq)Bv0I5Rc5v1fq>U)-`V5?jW#n#R zDKcA+1cs&vnI0*7hNj5GJ`xz3A`JaVU}%ak^dn+u@!)3h#N#g@4+0QWff!KLMLv%Q z9^|d{=XsDKs6HNea5JNN=ZnOHo5h3g3_M5)JSaLIqzDhbGw@({wes$A%P)E!RL14I z%j6~fR~=>T77spACgVMq8W7HRyr>N5J9)9W&vygs9blE6W!`d=nxx{7+7Je}kiQT(Hxe#bt}(Heu&eRE>)Z+-Iw0 zK)XQCDJ5r;mLrV(XT1Hk!PLUK%&lWxitytTVU-d%;%=oFW#3;fAp9)KiB7FO)u*4{`#6A_47)6%Y zr?kYLm2$8}q0-+hNB>*7m>12SX>`%Ses_v_zj~aX?nBrWg_`r1f z9_)N~p;GG7UK69?^U~v9UNK_G^_%6^OD!%-QLE;+%B9OKaBF2M@W6e`#2&B$f2+(* zg4Yn8&;u%bx?Jk%+h)!zyZS7}#f6NmmaFDIykrl6^Xc*>m&UhwIF$cWxwN%!SCWmz zUn++i&=NfDvI#c_+?uBFv>*usTDot(*aHJ)-${}nAXM|)<>+_H?yNXv7>Eaffh72T zyX>z!Ha zRx5v8ZuxKJLhpC^bb0FHO9QFevAGA`1$oV3>XPkKXMXy{HGd>cffL-!1W&05ll!(! z7o5N~j!RyFQ=h05a%T@7Jmmn}F)y6L*{v(5_~NNF5PfVyJB5yA>yZ1P8L8Qf9($6sR)dm}j z=f@kn_v~@rLi4)1#c`TLF~l7RH8c7Vqo=aUEQ8@J3tk!WI`#3#WiC|yoRTW4x^no7 z<>>!L;Qg9QtFZGHUSO=cY%L2M*?oF0^2lI(CXUk8n)Mlf!dy{+smAjDaNi2wkEwCH z1ea+w&!j2Ya>$!IgT$U(>pV!p30d{s#Eg-r?xHdYTSTj-Y84MCyT=~nQ{Z&YdWcBz zzJHbcOaC=(OOSZYKFSldB|fYDdOoWyWbD!FRvy&%zPaGM{+i^6a)XTBSGafkHhcP2 z)5}tb330zzj@@UVC`y+8X}Q#Ag38LQW*)fff$af{^iRv1oKUm}fcVq$wkxtGtm!{3 zb7!UUW>kX;e^xGComP-A<3R>fo{B%yVw8Ba7S@EbYUu-D{#luukQBtN3d=8*BX>zA zCE!tX5{DuZ=S$^|t>!wW2&^xaw>g(OMIe2tOvK#2CsY-3e_oDu>3c$~j(p^it0MC3 z>A8uU2b2K8a%CTL86iw5t}DJgf?CgcFSY`Ap(e3I;v3EIm3qYmEQm7XPb}#6Fvqtn zUZ258aD>=qm>-k-pcjTFLj8H!Cj)@HA@7e!QeAvF%hjIx z3TS+-%+5Z2?5`fLS{5P207=)S{6&V7y!wAp_6HWJ45YuvSb(};FGuclXf=`( zo??I`C;ZnloaBW6dU;FW7dhd-E(Uct!7k5~{XM!>=OfP`=>mvnGMvmuUaq=(bg2xa zXUg28`zHmqjjgzp%PoIZDfHgQL1w;n)AW~N=D%d^;$32y^ouHHxOp=-r$7gezStJ-``ra4v`cdrG+}FNg_34I_!a92PE%^g}bPLS2RV(m46)x)i zRyP~u@rJ*1F=c(p*2OtOmtyeM5P6dmbS`{I2#z#FKCaMMm@+r8Zh=cLx3Hr$WK1*` zEJ%n9&J)y%=&U5ySj?CeL`IOlC4m_|e0T`W(~eo_Qp5*2%1f}bG0BILLMmpOx9L^X zCaHI1HVKydsJI1&{Ox<>UZL1wW4m)cL=y>Or`hQt7P#)rk;v+AHsUyv%Xd!_GRTkD z?RXTa1YDZr?rRlmgeV>d;B4HM`|ftzjX}Lusp$N@*8R{w`rm6`D}%pguaab9(teQG znM^0i%g@3YrPu={>+P=9x{F9KOcH_R} z0RK&1V^zl)(m68G*AI+c&KgdvX}FJ5SF}|_Zh)-MC%ePwwK39Ua^KXtCNBw16cI{x zJBs`I^?lRp>faRVn+CCay3z2JXZFpmt9)}>*?2C`8~5j2ECW%>o^3%=Vz{J=`{i85 zCQEM#vnv6e8J~(mS`b-%s3Cw#(=}~Tc>M?9M=%XjCbyh6rF;sGsWa%N4DbGAH;Dnw zxe4+(m8ijOGEz0uRF6#a$<}PsseOXv77g03U9^{y3)Wx*H2y+6)y1nm5Y(5#YxYT{ zF)9j>BczCPeWX!Mmn5_{|pANauS9u}B4f~WOt)T>z)hovoV z#wAYlH$TE7D-)iplojC=S6|ohio?i8Wce6jJav63PU%VxU&|lO6EN67NIUQnlnHK~ zT`)GHop4=C8QcaU`z&m27LqcIGRE*rM3B_jxYQWon9DV>G*Y-eutRRd`C&F2y0!`k z~sR^9}MT8uxj7`bSMX;JDIrjlJc1AVlf1C>NOeTCdzq@nYdxPn-e zI_4{UE!jBGTv=kAeK1PN!_Fw#*8{h^a|m8JMh8LZLkV$8A4L?o_f)*zPZ5pfJ(X=P zHI^c(+IuS8$*&UzTV+MQw-SA;QZzl4GPOA=SB>A;iq%3{8qOyxrG356Rg&SL(Mw9` zgYlBnUUk2qj^-wv85XDO(~x0|+BVD}^#WVm9oWD{0F)t_3N@1^=Pjd?Y9=M6B;ZN22uWq(w-DNeV#kn^^IBD|_| zwiYF>Dqqu+@D{@|Y#KR#gwvw)n4^&0qHIc*;~Hh#;lCdEfz`l|sN*$FJvQ#Vpg?a_ zsoyk@b-3ST;Dg2!R7b08x})e5gn~PuZETT!i!!j9iQ+&P=g#1+Kh#&BJmZ*%8;Cf` z?X(B$IY0TD`II#>9x!fNt<&M8a-ZW#cKeH85XV8d<8N}(-KldyUfIPcF7c30$;}a= zYAi^vlHG5MkR{G4bglw>fIarfivRwRJz(K}va;1Vd+h-W{gV}(y(tX*pP#H;etp~> zw>hBt{`n^>SL}`dAo7bHAWvzr{|5d(cl1z`!rHK_1(Y%?dyPfN?dCzxI>^LG4v-wm z?(2RtfR(cgxWjY_xn5+7D;{=jFP6B;l}}h8T%ymMNEl{-aKx*rgTp>HZk|1b8~y1a zB(qLeI5ovjtwM?6JkaEMR<;6|%n+hSI5^gJAk!G5K;o`w?00T`X&!0nvMx$D{o$NH z&raHNtNfWpGxEhwaTreMr8-R+=q@)8M9t<--ixR5b0`1BQ~9}*2ji*y z+{t(SRDSLXn!BeocOUHZbH{^cE2TZXKiiq!Xg|9Y(86Uv3}56%I=emPM&65@2z%gAW|v&Hhh;OY|#0woo!$M-Q7NR z#^Nw;*`Qz2jFm21pIos6VXB6uYR7%J+RJ!KwkqzYJhqmctZg{0kJf}~tk0&%Aqmr^ zOyoP&zC^TG@n}9y>%J#)CPm2jLHX|5{AwIrTAa0=JFQ^s=E$W)Sb3R~-8!Zq#gY+_ z$gGZ+8+%N4_*C$)n)=j)L*hX~vOp_Lx z$bw;#wf~~j*o~x0Z?~C817EsdrigvMfPIZ`704WtQOOLO{_hn&XBY)K>BfRAA(sKW*IW{|dk^~I=NXp@balbk@+@xvmdAkQwEU`f-VK8H znwK}*QF?a#je4a@=yd=VGR<59HIJFq`5vg@>-I2wr3*p|HB;J5H8_P)Pk8Rs0}tS^ zUl=K$NcN(o&JZdH_PH3|FC=Xmd`pzeb&w!|MLZ}(grJA^tQlvkXsAvUO%R*C-!Xky z4T6K|;WR*-5?K_QV=G)3y=>Vg2VC`ZsD4gt6b$wyZdUdthfpK#4_K#kS5JJX1nm8= zablU!NZH%$6RL#VaIb9iC!}8q%?m*WrU#CA!Hf?LQbZlUx|blz{i1%@{2NYkdVw6^>=W5`WY>LEd?dqaupK>H*>+93gUJp4Yy~&CA{nYd`J0u}-n4>T;XKG- z${GI6JWRR6zggLKZPvtK?B0{Y1jKWd(t)&sQr&rw!IWG4xjalc#-FQf+n+T7=5v)B zZ_+(3QE6MX@^_V%pKL8CBfx2;6D~%`MmX<|$jr_y!f(sxbY5Zgu_BXIGWyrJrzJ?y z_}Q|KhU>@E{WbVKWymy}4L*>Y#=PvvnB;WY3o_JpXM>jo%e_0vX_z>ygfTZ)h7a9rEN$2AeP-W*=^sga2eozI;%2}UZe_3*cODcB~#zL9#Tp1Bxuh_ z(W4&bFkK^Qy{2cnt4QK2X$|J^01cf4pl4=|uG1p+=yxj}+rwMX{BGqE_c1goGiTqe z@G&&mquXSU{zD~tTdT82DO39&D)F}Xe=5Lxlr^r0T2WDdIB$+>J4?+-w>7Mpw^f)r z5oLCgox(tPaR$}4WdH?jbhY->f86^+`knvCxXut_xM1WM)gHj854C#T+#WE9hg!XE zZVwprL#!bCRw(JVEFHrOMjAyy%b3tju{q==#ppRhUQ+B$d7>+n_oi0= zL0FAOQtM5vL_EI`ScwO3Z7p5a`&rV=R-=#Y@w|~Y-0dz2mV4^6%_DClu&e_!YCK7@ zNT=k&B1)0SAV)uDs1g<*u#al%;}E;~%9y>fs|A7yX>0c7268`AOdpcUQAjeG(Pb2p zOnz&gLXydEZS^0t6@?U}d}}M$1N{1?_y*@^txugnCtPlD3;7LXFxBio?!F@b4Y#Hf z0o69W?w_8Y>a|1N^L6*3B{jr7=06qh>h@msJ%GC^2T#N$c~h!a^sl`P-5dxbE=i*2 z+5V)syX(LXydd;`xVBuoYh~gDzo+;)4uDYubC6Ghkiek(BsfBNhD(4h2KYPuNnv-} zXy;t>lxxH#Ne7R|U6z8vXChr>=weX5DJboSN)P>Q6pgNFd+`2v>uPA+bhL8zVj!Or z$gTt1+=vHtZE}i^@2I*O><~~7-W7K;k>sT;o0sZhz5LbtN-5@r(GK5K1U>kJ@s>QK zGaLY(zZkf`HgIi*Utx4}4%{JHADmqS^7+u=ZwzGDf$feEnsW_w$f@`n^rW}P{sgyX zwbasbSs`wV3!Nlcv{dygsurV`Epa;^jcKVCOD%itXQ5gww_MLpC5yGG(QCMAo(gg4 z0Ec`=`DTkws~${FU?q8b!uul3FrDR~58{}sJ~%n8U!FlF4z=jfdCmXp116m2jf<`1 zU=`b1uXevl1BqF~_PoOHP1m~LU>0Au*ZnT?d;d-GFQfEu3x07+X(yj%9@EWIx-x71 z-Bf-RB&-6jtSc`Yzai-8xTz*o{(T$5y}31WukhB{?E}YRPv<6rhvQ5K<)8t8jy9QU z?&2ufX;TA@L11c^%uJo;r6vMPV-x+unAwIHLz{c9@w8v+elyN3y;sI}F-)p&D;@8B zxxTNsgmcPW*qg2cKL<`>b~gSvE;f8hgvtnQ*mzyfq=%$EvxflIKd0wyd^cFwv2zr+ z^&fD*+xUI+-6%7a)~M~-ewbm~Eyu(yd~%?;y>#4a6?0Je9xcz^7!bK!oO->hqxv3d z5vbce)S`pB9jLbg6%Obr-Q9aVL$$-m=7h|TO!wCKGuWolc#@WKD4;q6gkBj8T63|7 z53$&9Q0Uq32FCHwV$aRTY+xmaLeDM3HZWT6DBW!>$~mZHm+;(86saY}9qqlY4p4V^ zsGRI9?(k5{4(bjM6|(etsFfU4PGj=C#~_hrQtai}uW%*Dq}c1BR)E^;p;jE!-dlzz zD)7~Eo%92>-N4A?n6*v_j~V7 z6Vh$kUig2@x-I5)8*ece1^n*>_(GdZ*4;)M&EdG!i}&h9yLt1j6#s(ZADXgX>!*~y zi$i(KMo`rB;2kIte82hPAbw&k2y1t6Fs`_-7hW8~m;bQ1W-Pwx!B==q=AzX2ih;P` zjwZ4@Dmj(rSllT|ea7D$d~p=NYVgA*+@Z!t@Pcxs8H&xO8kf{ApV1}p3-$Sn1AE$! zSE^0RXs5Lvbd8UFp#6VnlZ_-bD=jaJPuqUm&VJkXLT)toBTW(4n@h93_a>+GjZ9Oj zS1U_c!dwYq!OHaZ=Nq(b%ZfoKug|Bui1!zc6u_fOk`;%Z*TQGHo|hV z7r_{{#m#6l!b^&cQDyr4-X_LtO>@Lk(C)zqgC*;HP z+;5kvU` z*>Hy%YnR(K?7IfGf#z`3Cc8Da$!_ha+BmeyZcVOnzrk_qwXe!=vRkjyM`+xh3yxb~ z{>rfDZY$kmjl?~H5ZF5DPOd$7o9_e*r?|~`qCIz;@5Jx9+k7V~s+I4AEqJ@{M7wI} zM7!#CKN9V#+c{$#cGc~^6YZ+oeJ6fb-R?V4QLTI@0K2sGY6GB6Hguv*_EO)8HrY$b zdQ~SRZx%029B*lpz4W?!j6Qyoy>yp8|5IsIQLXNHm5~en=<%Is&kdbu&+Wd?s!-wEGo zD(<*WUt+4XwgQ#qx`R%nUGb*3G-xe|eWg(OJQL4(3{ChpQ?a+hsSbRYso3kG@>Ql{ z?{$X^YMX=FyK5j-7k(f2=|<`T*Uik-4|(%;%eorR!;1aBm4M#wUvKHVHI5JK>bnRRI zHo9uHVcCDz+|2C!`N77*v~z6K<{cE@pWx>E>y690eQCbOZHlG0Ym;z~syD?YPVp`G z$iVD4a7F(*!uDsw8MUg_m&TiLt*|>OUx!KQDI20&KlE5%;eC(wwVrhsGXLUZebH#A zY7Y8}$NpQzZAHm?d#}*aE5?$&S2QC}Iop_iFSDnxSh)qqI*w#Kj;$?mK`f?2SMXUC zhii%BOEhhZ4v;@xRsl|Ii&{g8if!n|{vF)d#-*FS1V6TI)^&8PYS;Fie#+f9dZ09H zkE*_^jG^|ZEtK@GeN!j_w4EE>UJZ`8a<;T2h2E2U;}Cw4!zg%Oa#z1AI^E>538FeN zb+a&_-OHE8balda*_Ks_@N{uYTUXr6n7Q+=TC7IneVcTB61-!_L)5~%b|NKoB68sB1lFq-YrCD-;|r=tga7d3MQD)#^@HBpWS zR}2{np?A@0yPp8NPOrG#PXHx)-|i=X61`s%$>)qYL`f5HN%89VI_n*$=8mZMrM*3J zUUGdhsm8|4y7i3YY>uzFF=!{-k&YIBBO=XBaY4?;;tr0n`I-_y(QUrJsO>rW>n+|I z@3a1>dU2jG>_&Zd8- z-RjB|TkTg@wqFw7(e6t8eh!NEtew#kUf(uBX~TKLgW`4R1Dr!V*nM~U0ACCbPP{IN G)c+T*Clg5k literal 0 HcmV?d00001 diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/meta/meta.proto b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/meta/meta.proto new file mode 100644 index 00000000000000..d3ffed747206c3 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/meta/meta.proto @@ -0,0 +1,38 @@ +syntax = "proto3"; + +package meta; + +/* + This is assigned to metadata fields. It describes how the metadata field should be represented + in DataHub. This enum must be used in the `meta` package. Multiple can be used for the same + metadata annotation. This allows a single piece of information to be captured in DataHub + as a property, tag and/or term. + + Tags can be strings, enums, or booleans + Terms can be strings or enums + Properties should be strings + +*/ +enum DataHubMetadataType { + PROPERTY = 0; // Datahub Custom Property + TAG = 1; // Datahub Tag + TERM = 2; // Datahub Term +} + +/* + Example below: The following is not required for annotation processing. This is an example + of creating an annotation using an enum. + */ + +enum MetaEnumExample { + UNKNOWN = 0; + ENTITY = 1; + EVENT = 2; +} + +// Assuming Glossary Term defined from bootstrap example +enum Classification { + HighlyConfidential = 0; + Confidential = 1; + Sensitive = 2; +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/meta/meta_field_options.proto b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/meta/meta_field_options.proto new file mode 100644 index 00000000000000..22ee5925164587 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/meta/meta_field_options.proto @@ -0,0 +1,28 @@ +syntax = "proto3"; + +package meta.fld; + +import "google/protobuf/descriptor.proto"; +import "extended_protobuf/meta/meta.proto"; + + +extend google.protobuf.FieldOptions { + // Required: Mark option field with how to export to DataHub in one or more places. + repeated meta.DataHubMetadataType type = 6000; + + /* + Examples below: The following is not required for annotation processing. + */ + + // Set true if the field is a primary key. This works for any boolean with `primary_key` in it. + bool is_primary_key = 6010; + + // Extract classification field option as a Term, either works + string classification = 6001 [(meta.fld.type) = TERM]; + meta.Classification classification_enum = 6002 [(meta.fld.type) = TERM]; + + // Expose this option as a tag on the field. + string product_type = 70004 [(meta.fld.type) = TAG]; + bool product_type_bool = 70005 [(meta.fld.type) = TAG]; + meta.MetaEnumExample product_type_enum = 70006 [(meta.fld.type) = TAG]; +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/meta/meta_message_options.proto b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/meta/meta_message_options.proto new file mode 100644 index 00000000000000..572a24cb203324 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/resources/extended_protobuf/meta/meta_message_options.proto @@ -0,0 +1,28 @@ +syntax = "proto3"; + +package meta.msg; + +import "google/protobuf/descriptor.proto"; +import "extended_protobuf/meta/meta.proto"; +import "extended_protobuf/meta/meta_field_options.proto"; + + +extend google.protobuf.MessageOptions { + /* + Examples below: The following is not required for annotation processing. + */ + + // Place the classification term at the Message/Dataset level, either string or enum is supported + string classification = 4000 [(meta.fld.type) = TERM, (meta.fld.type) = PROPERTY]; + meta.Classification classification_enum = 4001 [(meta.fld.type) = TERM, (meta.fld.type) = PROPERTY]; + + // Attach these Message/Dataset options as a tag and property. + string product = 5001 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + string project = 5002 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + string team = 5003 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + + string domain = 60003 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + meta.MetaEnumExample type = 60004 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY]; + bool bool_feature = 60005 [(meta.fld.type) = TAG]; + string alert_channel = 60007 [(meta.fld.type) = PROPERTY]; +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageA.proto b/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageA.proto new file mode 100644 index 00000000000000..2ebed30b3473fa --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageA.proto @@ -0,0 +1,46 @@ +syntax = "proto3"; +package protobuf; + +/* + Multiline message description + https://some/link + + #test-slack + @myOrg/teama + + Kafka topic: `platform.topic` + + References: + https://www.google.com/search?q=protobuf+messages + https://github.com/apache/kafka +*/ +message MessageA { + // Leading single line comment + bytes sequence_id = 1; + + // Leading multiline comment + // Second line of leading multiline comment + uint32 position = 2; + + // Detached comment + + uint32 total = 3; + + repeated uint64 repeated_num = 4; // Test repeated and trailing comment + repeated string repeated_str = 5; + + // Enum example + enum Corpus { + UNIVERSAL = 0; + WEB = 1; + IMAGES = 2; + LOCAL = 3; + NEWS = 4; + PRODUCTS = 5; + VIDEO = 6; + } + + Corpus corpus_field = 6; // enum field see + + map map_field = 7; // https://developers.google.com/protocol-buffers/docs/proto3#maps +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageA.protoc b/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageA.protoc new file mode 100644 index 0000000000000000000000000000000000000000..6ccad0f42772f323adad0c2bfdc80973282372a2 GIT binary patch literal 1781 zcmZux+j8PY5Jl1;L~~hrc9&&UCLX}Ia%`wLFDWNmiDQ$L6JJuWm#VyYWiepN64FXy z9sfaoDW8xZ$cN+$(lcm;Hwh0EXZrLx-91CY-!*s<#bFxWEGGScCy6`d$31yMEfwhe zVSsg!1?dJ<68>Sq=N`YFjhht*$wvBQ!VsPByCt((ti+a`U=W{8)c~_RIePe|4F@YBgr4k)A(U%VTy##*HHT; zL~Stc_=6lzc_@C0-aoqjf@ecnf+PJat@A6G%aUjf%5N^u-VTPN<8!+}OJKYkd~a(M zz&yJ+{&6s}by|kv`PB)=4Qc_o9K0LZgjT`&Wq5V^=H&IrE(*%qv(v$qZT|E-rczQU z9B5#HR;5OvK*cYe8d_lu{?b4v1xjorIDcwzvGCKGKbv!wy#_Ysi5Jfzah-6RrcrX# z?-4kC$`I=94pgKSrnU5L79tWU5rQJET!f%VD;FUs z(n67SAtD|v({~N0&IY(M82`-;BA;hE^6FWs1~o7RS6qIZP^d#`2~oIBYAcL`Y};!& z9m5?`&BL0+r4>fHXhx|c?2;#WSd6(#V1-4zR?q3!$0n)gVKJ&rQeI)CZ&p{e4zkHZo@uo%cK48*`cW52JKU A(EtDd literal 0 HcmV?d00001 diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageB.proto b/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageB.proto new file mode 100644 index 00000000000000..0b9f6d0685de90 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageB.proto @@ -0,0 +1,17 @@ +syntax = "proto3"; +package protobuf; + +import "google/protobuf/wrappers.proto"; +import "protobuf/messageA.proto"; + +/** + * This contains nested types. + * + * Owned by TeamB + */ +message MessageB { + google.protobuf.Int64Value id = 1; // wrapped int64 + string value = 2; // message value + protobuf.MessageA nested = 3; // nested message a + google.protobuf.BoolValue hot = 6; // Indicator +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageB.protoc b/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageB.protoc new file mode 100644 index 0000000000000000000000000000000000000000..d6305e5234c1e5122cf42bdf3077d33216ac6525 GIT binary patch literal 7030 zcmbtZOLH5?5nk*rNpJ{~;L4V1SdS4XSu{<7GUf6^w#!)nOLAoppcbGQItOtH3`s-) z7Q!yXAlF=S$Un#}msF+l2Xf3MRXOFBOa4VpIp*ux2MCF@lnM{h&P-4D*N^G$8L)q> zuv@2|ciIsf{ec&F$HSA2^MTv%i-BK|Gsa@L9*(e{Utx1)Z+P4h&)m*X81J#ESM;%* z;;Ge?6Atou#J-gz(aTmSe!snj1-ZF{fyC-v60^lm{igZByt zqSN_-=o_^4$=`0XX+vLCew||f%Cc0(&{h>=^RF2$H#EHtFAh&m5DokR4_V#!dCLu4 z-tGlr&^!Yq!B4zF*9{midHvC#eR>w~&4&-aiZ*b&*DP>ub~;?H`FtRJF?c0f1;)>U zpzlB4*l3AYqT|W1i`0c}LOlWE0gw}j4aT@5T5TU5I35OVugBe9iw}JPINbM!gQk!( z$L*dw7)f0IIzMj*XWSde|Ggn_cD+{nq}_Ch!#Z~df>$Tdn$)`==BZ9|(M1O0q0rRA$$p-B2VP(J(DPqueJO0?z(m_~c{Gx9YrK$oB#7<(|bGw2J69!`# zTRrfO*QDLsflnIs!Z7g=y_YoikghgY=uJd|rt8rjD7RUFVQ+sBY ztuikjV$tHI+TNjKZ|^ktPOVb5oH{qFWz1F^j$Pbu)SNowd9#kKdAVX%54rX8J;$oo zdClSW?q0)fuED*I)-y1mZ9fmdq{#w+%&-2h#qwl2lKxsTVj_^#!Y zcEH9g+7-KTC6*{jtpPMVeNdZ}XCyH>dX`H;!2 zXI8br>pNzpGN~xzwS%hV5c-R1@}dQlX0c+CD2P4;Q^zVbNTKmp3C07!$~xoqJ*#Bn z3-+*J2GcoQkC@i2r~9D7A}^b}=C)Pmt8cRk98=nNtX)D4Q`Gm1^@iQpZ&-Z0Rx3+m z*DdFnU9#%m@Jfx$zs2|K7J!t^hABmZ2h4?aC{Wz5+tP4$wP88V{$9hbRX>534`3On zZenj)TDVpvZ7Ei)nsZ2O$qv#3>-=EH!aN*Z4XtI8LFx#OQsW{S1R_@8=&_!>YHe5S zZL36oCT46p_FM7t!~H(1nrVPi}8s27B!qM15FBCT5D{hp zN;cp+Z2MSnIp|0Tb#JVPQYJL4N;Q zp?|47PUU_Phb>$tLsW*~QA@VGN0Ep5lR8c=yeEATRXK{aqYJI^5l88eguNjm7)2U? znyvuzm+5PaGhJ6y#n8T1ujQ_h(@-_3uTEkq!={L0ib&Ed8e#X>vkUP^!)VE10t%!I z{d=XD%HdFl9pt0}C)AF=9sjs%zXE;=0KQKf>^9)12)?NrIYYviA|}v-0O(D1M&iW? zo6YomJOX+%yCl)8hQ6&lxe`6P%`ZNGJNofeVT_)dMfA;~@l^oWj?k-w-cHa<5fkV^ z0Q7c*9wThFBlH*ny$$pfJer}`6z9s|8NMU@3s;>h@N0lji-JcZ{7;kM35853017DZ zGzuP!u={BgJQzWNr%~|ehJK*@_*3S8wE0fvk1kka_!F^@Ak_{c^Xmlv@S^!2ZBC#E z0ni^t=En$|4V0T<@?5I?wsohrXf<(ikD-D6xz-){Zn>N+<5Q7{;aBU!ADPSA1-;zCJi z8c2|r*RYP8j9d2BJl-u6r;9zl)vVpbl58AJD!!Igdp zPvYAZxp2WAt1@wnX8^&A{4hiCUnYJC=_V8a1r&H0`2i#Bei``zBPj4PItFP&KgV78 zsvLSum-@HkAK(42fS(3{^XM3)3H~TK22#WXdJq8pC^`lhVe=?D1{eYTD7$n#EgPUm z?BDb3gSZ#kh$@&FJ>{h1h5TPMmfj7^n`zu;bG~>vq@Ln=yCvUoG7gKU?3R&Z>AvU7 zi|?|ESDtw%8PoGm;011HS(6Llz_`OQaclD&31C_0+N$o%#dFnRcY*{yTb=^R?3e^G zV0_CmO>fX2`p;#zu{;eC3!5J%Pf>+fg+fV^IddWT7IkRGcWf4&O|*3e+~+p=C0lph zzNEL%MoD#i@c4y*st`z0{Qlp-oA`-ef z*0R%2ev7S!t|$*u)Xd<;Mo|p-uPZh{^@6b##CY()mo4bex}zH2n*!l>UB-TJPhPm_MEmXLV}8`{xB)%< z6y(GaV~)TZ4MmavIN^JKejdMY(SwdJT)Y{5|MI&y5@XgcY7fYudHnRq;bwqQX0Fq3I2?4pe53}FO52J%PL zQ{fC5*FRv6h&CMq!HI{V!@avuM2vZ2?e2L?p#S3Nl4)YUr%~CYN4*rF2!q* ze*L}oWkaQ?@M2EAwrTJ(o{FG->vcs`u*xN@sGrFt?Vt))Ii#}Bq#_vHP*?JIc>@j| zUp45(EEu>H6ZE1rVI@tcJ+aL+h`KQs@dnP>BCnrJC&-q8)V zz-GcB7~j$7$72}s4%&m59Lsq_(MWfJbN;K%w>vP@0L#}{;&0{O?N zz%lk|L%V+g43+x(7r;z|{64_kl%50cH8q#d;zr6ZI0$KXzHgj*tOL{(s$e#RO3u{C6^4fqauZDA@DXVn* zr6|>L`~5a77;{QCm3z2G&j+}-((5Uns(n0`5Yp(G2Ctg6^B#uBBV-k~TO6n5IizKI zj_G7(5DANT7NuX3T6jvp5U|8<8g|5od13ex+t6gCCeeK3JMpuYpfoxa1p4 z%!g9yd=i;-p68Q1m;#@9+AFU)^Xy?D(maA?bHW%E_tRJ(GU@RVPN zk8BxH`!#=So8}VHyLM%h09giPQLW{vY|I3M(pJTKpGb!R!#IVZ3_Z>rf#bfSFpLw7+Aw+u4ZQ?}X;9wi2I=s)uL#rTbQ83{diM`{ C=~(*! literal 0 HcmV?d00001 diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageC2.proto b/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageC2.proto new file mode 100644 index 00000000000000..532ea18ba362ce --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageC2.proto @@ -0,0 +1,22 @@ +syntax = "proto3"; +package protobuf; + +/* + Test for one of nested array +*/ +message MessageC1 { + MessageC2 messageList = 1; +} + +message MessageC2 { + repeated MessageC3 list = 1; +} + +message MessageC3 { + // one of field comment + oneof one_of_field { + string one_of_string = 1; // one of string comment + int32 one_of_int = 2; // one of int comment + } + string normal = 4; +} diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageC2.protoc b/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageC2.protoc new file mode 100644 index 0000000000000000000000000000000000000000..d2988d2882a26915558aa6c8770b0812023b37fe GIT binary patch literal 815 zcmZvb(QeZ)6o%~+H%pE~Sx&Px>$)n7CNXg_L<=u4ZUBLh3ingTlBrUgpd?)E4fZfR zB*#vyp^0AQ_xbaM>1UB1~DZM(TIJ}j^75(RFOUt{=n7`~td{I}$OvDUo&t^95mX@NMnwpEt`*Bh}Vu$E6%N5|+_b+g~>QpK}yJ*ew3MnkUL7Q4I8 zj|zQKLL5m9P)6hlLFNm{lOo2~2uhIhMIdJj{86-BR@P0%h^#KNigj_9ZJK8Dfc*26 zkmUJ6j2e{^Ajms_IYu}#!Z%7g#m0kZ>XgT7cFG7)k`#(aTAXAT4mfFW8WuLfLK%fl zFxi=VulUq{il~rYo>(%@`k1TchHr?wk_;BN8h9M4S$Q;5__o;V&q?Q@u97^!fG roBJW`kIiw0(KNM}55O;&fExkF5(tKv*(eELI>oVsCbz!*C#?Pg(`ATQ literal 0 HcmV?d00001 diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageEmpty.proto b/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageEmpty.proto new file mode 100644 index 00000000000000..a8cb2fe9e80f69 --- /dev/null +++ b/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageEmpty.proto @@ -0,0 +1,6 @@ +syntax = "proto3"; +package protobuf; + +/* + Test for empty +*/ diff --git a/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageEmpty.protoc b/metadata-integration/java/datahub-protobuf/src/test/resources/protobuf/messageEmpty.protoc new file mode 100644 index 0000000000000000000000000000000000000000..6de6ec602a2b8059c5665961ac7cbbe3867f13a9 GIT binary patch literal 79 zcmd<`<&rKa$}h=JDoxYRO)V}?Oiy*qEhwqf1B(iAz$Cq7xY&eP7#J7@xj48Od4!l5 T7=%D9CLv}<2EimYusUM^lJ64% literal 0 HcmV?d00001 diff --git a/settings.gradle b/settings.gradle index 3ec3a2856239fd..911c2774e9f61c 100644 --- a/settings.gradle +++ b/settings.gradle @@ -42,4 +42,5 @@ include 'metadata-models-custom' include 'entity-registry:custom-test-model' include 'metadata-integration:java:spark-lineage' include 'metadata-integration:java:datahub-client' +include 'metadata-integration:java:datahub-protobuf' include 'ingestion-scheduler'