Skip to content

Commit

Permalink
PARQUET-1126: Write unencrypted Parquet files without Hadoop (#1376)
Browse files Browse the repository at this point in the history
* Internal changes to allow writing unencrypted Parquet without needing Hadoop in the classpath. Relates to PARQUET-1126.
* Avoid loading encryption factory twice.
  • Loading branch information
dlvenable authored and Fokko committed Aug 15, 2024
1 parent 0f3a615 commit af4307b
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.conf.HadoopParquetConfiguration;
import org.apache.parquet.conf.ParquetConfiguration;
import org.apache.parquet.hadoop.BadConfigurationException;
import org.apache.parquet.hadoop.api.WriteSupport.WriteContext;
import org.apache.parquet.hadoop.util.ConfigurationUtil;
Expand Down Expand Up @@ -53,12 +55,25 @@ public interface EncryptionPropertiesFactory {
* Load EncryptionPropertiesFactory class specified by CRYPTO_FACTORY_CLASS_PROPERTY_NAME as the path in the
* configuration
*
* @param conf Configuration where user specifies the class path
* @param conf Hadoop Configuration where user specifies the class path
* @return object with class EncryptionPropertiesFactory if user specified the class path and invoking of
* the class succeeds. Null if user doesn't specify the class path (no encryption then).
* @throws BadConfigurationException if the instantiation of the configured class fails
*/
static EncryptionPropertiesFactory loadFactory(Configuration conf) {
return loadFactory(new HadoopParquetConfiguration(conf));
}

/**
* Load EncryptionPropertiesFactory class specified by CRYPTO_FACTORY_CLASS_PROPERTY_NAME as the path in the
* configuration
*
* @param conf ParquetConfiguration where user specifies the class path
* @return object with class EncryptionPropertiesFactory if user specified the class path and invoking of
* the class succeeds. Null if user doesn't specify the class path (no encryption then).
* @throws BadConfigurationException if the instantiation of the configured class fails
*/
static EncryptionPropertiesFactory loadFactory(ParquetConfiguration conf) {
final Class<?> encryptionPropertiesFactoryClass = ConfigurationUtil.getClassFromConfig(
conf, CRYPTO_FACTORY_CLASS_PROPERTY_NAME, EncryptionPropertiesFactory.class);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.hadoop;

import java.net.URI;
import java.nio.file.Path;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.conf.ParquetConfiguration;
import org.apache.parquet.crypto.EncryptionPropertiesFactory;
import org.apache.parquet.crypto.FileEncryptionProperties;
import org.apache.parquet.hadoop.api.WriteSupport;
import org.apache.parquet.hadoop.util.ConfigurationUtil;

final class EncryptionPropertiesHelper {
static FileEncryptionProperties createEncryptionProperties(
ParquetConfiguration fileParquetConfig, Path tempFilePath, WriteSupport.WriteContext fileWriteContext) {
EncryptionPropertiesFactory cryptoFactory = EncryptionPropertiesFactory.loadFactory(fileParquetConfig);
if (null == cryptoFactory) {
return null;
}

Configuration hadoopConf = ConfigurationUtil.createHadoopConfiguration(fileParquetConfig);
URI path = tempFilePath == null ? null : tempFilePath.toUri();
return cryptoFactory.getFileEncryptionProperties(
hadoopConf, path == null ? null : new org.apache.hadoop.fs.Path(path), fileWriteContext);
}

static FileEncryptionProperties createEncryptionProperties(
Configuration fileHadoopConfig,
org.apache.hadoop.fs.Path tempFilePath,
WriteSupport.WriteContext fileWriteContext) {
EncryptionPropertiesFactory cryptoFactory = EncryptionPropertiesFactory.loadFactory(fileHadoopConfig);
if (null == cryptoFactory) {
return null;
}
return cryptoFactory.getFileEncryptionProperties(fileHadoopConfig, tempFilePath, fileWriteContext);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.parquet.column.ParquetProperties;
import org.apache.parquet.column.ParquetProperties.WriterVersion;
import org.apache.parquet.crypto.EncryptionPropertiesFactory;
import org.apache.parquet.crypto.FileEncryptionProperties;
import org.apache.parquet.hadoop.ParquetFileWriter.Mode;
import org.apache.parquet.hadoop.api.WriteSupport;
Expand Down Expand Up @@ -571,10 +570,6 @@ public static synchronized MemoryManager getMemoryManager() {

public static FileEncryptionProperties createEncryptionProperties(
Configuration fileHadoopConfig, Path tempFilePath, WriteContext fileWriteContext) {
EncryptionPropertiesFactory cryptoFactory = EncryptionPropertiesFactory.loadFactory(fileHadoopConfig);
if (null == cryptoFactory) {
return null;
}
return cryptoFactory.getFileEncryptionProperties(fileHadoopConfig, tempFilePath, fileWriteContext);
return EncryptionPropertiesHelper.createEncryptionProperties(fileHadoopConfig, tempFilePath, fileWriteContext);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.io.Closeable;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
Expand Down Expand Up @@ -387,9 +388,8 @@ public ParquetWriter(Path file, Configuration conf, WriteSupport<T> writeSupport
// attached.
if (encryptionProperties == null) {
String path = file == null ? null : file.getPath();
Configuration hadoopConf = ConfigurationUtil.createHadoopConfiguration(conf);
encryptionProperties = ParquetOutputFormat.createEncryptionProperties(
hadoopConf, path == null ? null : new Path(path), writeContext);
encryptionProperties = EncryptionPropertiesHelper.createEncryptionProperties(
conf, path == null ? null : Paths.get(path), writeContext);
}

ParquetFileWriter fileWriter = new ParquetFileWriter(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
import static org.junit.Assert.assertEquals;

import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.conf.ParquetConfiguration;
import org.apache.parquet.conf.PlainParquetConfiguration;
import org.apache.parquet.hadoop.util.ConfigurationUtil;
import org.junit.Test;

public class EncryptionPropertiesFactoryTest {
Expand All @@ -45,4 +48,24 @@ public void testLoadEncPropertiesFactory() {
encryptionProperties.getColumnProperties(SampleEncryptionPropertiesFactory.COL2),
SampleEncryptionPropertiesFactory.COL2_ENCR_PROPERTIES);
}

@Test
public void testLoadEncPropertiesFactoryParquetConfiguration() {
ParquetConfiguration conf = new PlainParquetConfiguration();
conf.set(
EncryptionPropertiesFactory.CRYPTO_FACTORY_CLASS_PROPERTY_NAME,
"org.apache.parquet.crypto.SampleEncryptionPropertiesFactory");

EncryptionPropertiesFactory encryptionPropertiesFactory = EncryptionPropertiesFactory.loadFactory(conf);
FileEncryptionProperties encryptionProperties = encryptionPropertiesFactory.getFileEncryptionProperties(
ConfigurationUtil.createHadoopConfiguration(conf), null, null);

assertArrayEquals(encryptionProperties.getFooterKey(), SampleEncryptionPropertiesFactory.FOOTER_KEY);
assertEquals(
encryptionProperties.getColumnProperties(SampleEncryptionPropertiesFactory.COL1),
SampleEncryptionPropertiesFactory.COL1_ENCR_PROPERTIES);
assertEquals(
encryptionProperties.getColumnProperties(SampleEncryptionPropertiesFactory.COL2),
SampleEncryptionPropertiesFactory.COL2_ENCR_PROPERTIES);
}
}

0 comments on commit af4307b

Please sign in to comment.