Skip to content

Commit

Permalink
feat(protobuf): disable binary protoc custom properties (#10778)
Browse files Browse the repository at this point in the history
  • Loading branch information
david-leifker committed Jun 25, 2024
1 parent e1f85d7 commit 0627853
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 5 deletions.
3 changes: 3 additions & 0 deletions docs/how/updating-datahub.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ This file documents any backwards-incompatible changes in DataHub and assists pe

### Breaking Changes

- Protobuf CLI will no longer create binary encoded protoc custom properties. Flag added `-protocProp` in case this
behavior is required.

### Potential Downtime

### Deprecations
Expand Down
2 changes: 2 additions & 0 deletions metadata-integration/java/datahub-protobuf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,8 @@ usage: Proto2DataHub
--platform <arg> [Optional] The data platform to produce
schemas for. e.g. kafka, snowflake, etc.
(defaults to kafka)
-protocProp [Optional] Store the protoc as a custom
property. (defaults to false)
--slack_id <arg> [Optional] The Slack team id if your protobuf
files contain comments with references to
channel names. We will translate comments like
Expand Down
4 changes: 2 additions & 2 deletions metadata-integration/java/datahub-protobuf/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ dependencies {
implementation externalDependency.jgrapht
implementation externalDependency.gson
implementation externalDependency.commonsCli
implementation externalDependency.httpAsyncClient
implementation externalDependency.httpClient
implementation externalDependency.slf4jApi
implementation externalDependency.jacksonCore
compileOnly externalDependency.lombok
Expand Down Expand Up @@ -88,7 +88,6 @@ shadowJar {
it.name
}
dependencies {

exclude(dependency {
exclude_modules.contains(it.name)
})
Expand All @@ -115,6 +114,7 @@ shadowJar {
relocate 'org.jgrapht', 'datahub.shaded.org.jgrapht'
relocate 'org.jheaps', 'datahub.shaded.org.jheaps'
relocate 'ch.randelshofer', 'datahub.shaded.ch.randelshofer'
relocate 'org.apache.hc', 'datahub.shaded.org.apache.hc'

finalizedBy checkShadowJar
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,13 @@ public class Proto2DataHub {
+ "(Default is schema)")
.build();

private static final Option OPTION_PROTOC_CUSTOM_PROPERTY =
Option.builder()
.option("protocProp")
.hasArg(false)
.desc("[Optional] Store the protoc as a custom property. (defaults to false)")
.build();

enum TransportOptions {
REST,
KAFKA,
Expand All @@ -172,6 +179,7 @@ static class AppConfig {
private final String slackId;
private final String dataPlatform;
private final String protoc;
private final boolean enableProtocCustomProperty;
private final String inputFile;
private final String messageName;
private final String inputDir;
Expand Down Expand Up @@ -207,6 +215,7 @@ static class AppConfig {
subType = cli.getOptionValue(OPTION_SUBTYPE, "schema").toLowerCase(Locale.ROOT);
inputDir = cli.getOptionValue(OPTION_DIR, null);
excludePatterns = cli.getOptionValues(OPTION_EXCLUDE_PATTERN);
enableProtocCustomProperty = cli.hasOption(OPTION_PROTOC_CUSTOM_PROPERTY);
}

private AppConfig validate() throws Exception {
Expand Down Expand Up @@ -269,7 +278,8 @@ public static void main(String[] args) throws Exception {
.addOption(OPTION_TRANSPORT)
.addOption(OPTION_FILENAME)
.addOption(OPTION_SUBTYPE)
.addOption(OPTION_HELP);
.addOption(OPTION_HELP)
.addOption(OPTION_PROTOC_CUSTOM_PROPERTY);

Options firstPassOptions = new Options().addOption(OPTION_HELP);

Expand Down Expand Up @@ -357,6 +367,7 @@ public static void main(String[] args) throws Exception {
ProtobufDataset.builder()
.setDataPlatformUrn(new DataPlatformUrn(config.dataPlatform))
.setProtocIn(new FileInputStream(config.protoc))
.setEnableProtocCustomProperty(config.enableProtocCustomProperty)
.setFilename(filePath.toString())
.setSchema(textSchema)
.setAuditStamp(auditStamp)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public static class Builder {
private String githubOrganization;
private String slackTeamId;
private String subType;
private boolean enableProtocCustomProperty;

public Builder setGithubOrganization(@Nullable String githubOrganization) {
this.githubOrganization = githubOrganization;
Expand Down Expand Up @@ -119,6 +120,11 @@ public Builder setSubType(@Nullable String subType) {
return this;
}

public Builder setEnableProtocCustomProperty(boolean enableProtocCustomProperty) {
this.enableProtocCustomProperty = enableProtocCustomProperty;
return this;
}

public ProtobufDataset build() throws IOException {
FileDescriptorSet fileSet = FileDescriptorSet.parseFrom(protocBytes);

Expand All @@ -135,6 +141,7 @@ public ProtobufDataset build() throws IOException {
.setDatasetVisitor(
DatasetVisitor.builder()
.protocBase64(Base64.getEncoder().encodeToString(protocBytes))
.enableProtocCustomProperty(enableProtocCustomProperty)
.datasetPropertyVisitors(
List.of(new KafkaTopicPropertyVisitor(), new PropertyVisitor()))
.institutionalMemoryMetadataVisitors(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ public class DatasetVisitor
@Builder.Default
private final ProtobufModelVisitor<Deprecation> deprecationVisitor = new DeprecationVisitor();

@Builder.Default private final boolean enableProtocCustomProperty = false;

@Override
public Stream<MetadataChangeProposalWrapper<? extends RecordTemplate>> visitGraph(
VisitContext context) {
Expand All @@ -92,7 +94,9 @@ public Stream<MetadataChangeProposalWrapper<? extends RecordTemplate>> visitGrap
.setCustomProperties(
new StringMap(
Stream.concat(
Stream.of(Map.entry("protoc", protocBase64)),
enableProtocCustomProperty
? Stream.of(Map.entry("protoc", protocBase64))
: Stream.empty(),
g.accept(context, datasetPropertyVisitors)
.flatMap(
props ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import static datahub.protobuf.TestFixtures.*;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNull;

import com.linkedin.common.urn.DatasetUrn;
import com.linkedin.data.template.RecordTemplate;
Expand All @@ -21,7 +22,8 @@ public class DatasetVisitorTest {
@Test
public void protocBase64Test() throws URISyntaxException, IOException {
String expected = "23454345452345233455";
DatasetVisitor test = DatasetVisitor.builder().protocBase64(expected).build();
DatasetVisitor test =
DatasetVisitor.builder().protocBase64(expected).enableProtocCustomProperty(true).build();

List<MetadataChangeProposalWrapper<? extends RecordTemplate>> changes =
test.visitGraph(
Expand All @@ -35,6 +37,23 @@ public void protocBase64Test() throws URISyntaxException, IOException {
.collect(Collectors.toList());

assertEquals(expected, extractCustomProperty(changes.get(0), "protoc"));

DatasetVisitor testDisabled =
DatasetVisitor.builder().protocBase64(expected).enableProtocCustomProperty(false).build();

List<MetadataChangeProposalWrapper<? extends RecordTemplate>> changesDisabled =
testDisabled
.visitGraph(
VisitContext.builder()
.auditStamp(TEST_AUDIT_STAMP)
.datasetUrn(
DatasetUrn.createFromString(
"urn:li:dataset:(urn:li:dataPlatform:kafka,protobuf.MessageA,TEST)"))
.graph(getTestProtobufGraph("protobuf", "messageA"))
.build())
.collect(Collectors.toList());

assertNull(extractCustomProperty(changesDisabled.get(0), "protoc"));
}

@Test
Expand Down

0 comments on commit 0627853

Please sign in to comment.