Skip to content

Commit

Permalink
Merge pull request #4213 from IQSS/4185-worldmap-s3-thumbnail
Browse files Browse the repository at this point in the history
4185 worldmap s3 thumbnail
  • Loading branch information
kcondon authored Oct 20, 2017
2 parents 6b53fa1 + 75f8d3b commit 724b243
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 29 deletions.
19 changes: 15 additions & 4 deletions src/main/java/edu/harvard/iq/dataverse/FilePage.java
Original file line number Diff line number Diff line change
Expand Up @@ -502,17 +502,28 @@ public String save() {
return "";
}

private Boolean thumbnailAvailable = null;

public boolean isThumbnailAvailable(FileMetadata fileMetadata) {
// new and optimized logic:
// - check download permission here (should be cached - so it's free!)
// - only then ask the file service if the thumbnail is available/exists.
// the service itself no longer checks download permissions.
// the service itself no longer checks download permissions.
// (Also, cache the result the first time the check is performed...
// remember - methods referenced in "rendered=..." attributes are
// called *multiple* times as the page is loading!)

if (thumbnailAvailable != null) {
return thumbnailAvailable;
}

if (!fileDownloadHelper.canDownloadFile(fileMetadata)) {
return false;
thumbnailAvailable = false;
} else {
thumbnailAvailable = datafileService.isThumbnailAvailable(fileMetadata.getDataFile());
}

return datafileService.isThumbnailAvailable(fileMetadata.getDataFile());
return thumbnailAvailable;
}

private String returnToDatasetOnly(){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,11 @@ private static boolean generateWorldMapThumbnail(StorageIO<DataFile> storageIO,
return false;
}
} catch (FileNotFoundException fnfe) {
logger.fine("No .img file for this worldmap file yet; giving up.");
logger.fine("No .img file for this worldmap file yet; giving up. Original Error: " + fnfe);
return false;

} catch (IOException ioex) {
logger.warning("caught IOException trying to open an input stream for worldmap .img file (" + storageIO.getDataFile().getStorageIdentifier() + ")");
logger.warning("caught IOException trying to open an input stream for worldmap .img file (" + storageIO.getDataFile().getStorageIdentifier() + "). Original Error: " + ioex);
return false;
}

Expand Down
109 changes: 87 additions & 22 deletions src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,24 @@
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DvObject;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
import edu.harvard.iq.dataverse.util.FileUtil;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.channels.Channel;
import java.nio.channels.Channels;
import java.nio.channels.WritableByteChannel;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Random;
import java.util.logging.Logger;
import org.apache.commons.io.IOUtils;

Expand Down Expand Up @@ -186,6 +192,7 @@ public void savePath(Path fileSystemPath) throws IOException {
File inputFile = fileSystemPath.toFile();
if (dvObject instanceof DataFile) {
s3.putObject(new PutObjectRequest(bucketName, key, inputFile));

newFileSize = inputFile.length();
} else {
throw new IOException("DvObject type other than datafile is not yet supported");
Expand All @@ -205,6 +212,25 @@ public void savePath(Path fileSystemPath) throws IOException {
setSize(newFileSize);
}

/**
* Implements the StorageIO saveInputStream() method.
* This implementation is somewhat problematic, because S3 cannot save an object of
* an unknown length. This effectively nullifies any benefits of streaming;
* as we cannot start saving until we have read the entire stream.
* One way of solving this would be to buffer the entire stream as byte[],
* in memory, then save it... Which of course would be limited by the amount
* of memory available, and thus would not work for streams larger than that.
* So we have eventually decided to save save the stream to a temp file, then
* save to S3. This is slower, but guaranteed to work on any size stream.
* An alternative we may want to consider is to not implement this method
* in the S3 driver, and make it throw the UnsupportedDataAccessOperationException,
* similarly to how we handle attempts to open OutputStreams, in this and the
* Swift driver.
*
* @param inputStream InputStream we want to save
* @param auxItemTag String representing this Auxiliary type ("extension")
* @throws IOException if anything goes wrong.
*/
@Override
public void saveInputStream(InputStream inputStream, Long filesize) throws IOException {
if (filesize == null || filesize < 0) {
Expand Down Expand Up @@ -235,24 +261,23 @@ public void saveInputStream(InputStream inputStream) throws IOException {
if (!this.canWrite()) {
open(DataAccessOption.WRITE_ACCESS);
}
//TODO? Copying over the object to a byte array is farily inefficient.
// We need the length of the data to upload inputStreams (see our putObject calls).
// There may be ways to work around this, see https://github.com/aws/aws-sdk-java/issues/474 to start.
// This is out of scope of creating the S3 driver and referenced in issue #4064!
byte[] bytes = IOUtils.toByteArray(inputStream);
long length = bytes.length;
ObjectMetadata metadata = new ObjectMetadata();
metadata.setContentLength(length);
String directoryString = FileUtil.getFilesTempDirectory();

Random rand = new Random();
Path tempPath = Paths.get(directoryString, Integer.toString(rand.nextInt(Integer.MAX_VALUE)));
File tempFile = createTempFile(tempPath, inputStream);

try {
s3.putObject(bucketName, key, inputStream, metadata);
s3.putObject(bucketName, key, tempFile);
} catch (SdkClientException ioex) {
String failureMsg = ioex.getMessage();
if (failureMsg == null) {
failureMsg = "S3AccessIO: Unknown exception occured while uploading a local file into S3 Storage.";
}

tempFile.delete();
throw new IOException(failureMsg);
}
tempFile.delete();
setSize(s3.getObjectMetadata(bucketName, key).getContentLength());
}

Expand Down Expand Up @@ -336,7 +361,7 @@ public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOExcep
String destinationKey = getDestinationKey(auxItemTag);
try {
File inputFile = fileSystemPath.toFile();
s3.putObject(new PutObjectRequest(bucketName, destinationKey, inputFile));
s3.putObject(new PutObjectRequest(bucketName, destinationKey, inputFile));
} catch (AmazonClientException ase) {
logger.warning("Caught an AmazonServiceException in S3AccessIO.savePathAsAux(): " + ase.getMessage());
throw new IOException("S3AccessIO: Failed to save path as an auxiliary object.");
Expand Down Expand Up @@ -367,31 +392,71 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon
}
}

//todo: add new method with size?
//or just check the data file content size?
// this method copies a local InputStream into this DataAccess Auxiliary location:
/**
* Implements the StorageIO saveInputStreamAsAux() method.
* This implementation is problematic, because S3 cannot save an object of
* an unknown length. This effectively nullifies any benefits of streaming;
* as we cannot start saving until we have read the entire stream.
* One way of solving this would be to buffer the entire stream as byte[],
* in memory, then save it... Which of course would be limited by the amount
* of memory available, and thus would not work for streams larger than that.
* So we have eventually decided to save save the stream to a temp file, then
* save to S3. This is slower, but guaranteed to work on any size stream.
* An alternative we may want to consider is to not implement this method
* in the S3 driver, and make it throw the UnsupportedDataAccessOperationException,
* similarly to how we handle attempts to open OutputStreams, in this and the
* Swift driver.
*
* @param inputStream InputStream we want to save
* @param auxItemTag String representing this Auxiliary type ("extension")
* @throws IOException if anything goes wrong.
*/
@Override
public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException {
if (!this.canWrite()) {
open(DataAccessOption.WRITE_ACCESS);
}

String directoryString = FileUtil.getFilesTempDirectory();

Random rand = new Random();
String pathNum = Integer.toString(rand.nextInt(Integer.MAX_VALUE));
Path tempPath = Paths.get(directoryString, pathNum);
File tempFile = createTempFile(tempPath, inputStream);

String destinationKey = getDestinationKey(auxItemTag);
byte[] bytes = IOUtils.toByteArray(inputStream);
long length = bytes.length;
ObjectMetadata metadata = new ObjectMetadata();
metadata.setContentLength(length);

try {
s3.putObject(bucketName, destinationKey, inputStream, metadata);
s3.putObject(bucketName, destinationKey, tempFile);
} catch (SdkClientException ioex) {
String failureMsg = ioex.getMessage();

if (failureMsg == null) {
failureMsg = "S3AccessIO: Unknown exception occured while saving a local InputStream as S3Object";
}
tempFile.delete();
throw new IOException(failureMsg);
}
tempFile.delete();
}


//Helper method for supporting saving streams with unknown length to S3
//We save those streams to a file and then upload the file
private File createTempFile(Path path, InputStream inputStream) throws IOException {

File targetFile = new File(path.toUri()); //File needs a name
OutputStream outStream = new FileOutputStream(targetFile);

byte[] buffer = new byte[8 * 1024];
int bytesRead;
while ((bytesRead = inputStream.read(buffer)) != -1) {
outStream.write(buffer, 0, bytesRead);
}
IOUtils.closeQuietly(inputStream);
IOUtils.closeQuietly(outStream);
return targetFile;
}

@Override
public List<String> listAuxObjects() throws IOException {
if (!this.canWrite()) {
Expand All @@ -405,7 +470,7 @@ public List<String> listAuxObjects() throws IOException {
List<S3ObjectSummary> storedAuxFilesSummary = storedAuxFilesList.getObjectSummaries();
try {
while (storedAuxFilesList.isTruncated()) {
logger.fine("S3 listAuxObjects: going to second page of list");
logger.fine("S3 listAuxObjects: going to next page of list");
storedAuxFilesList = s3.listNextBatchOfObjects(storedAuxFilesList);
storedAuxFilesSummary.addAll(storedAuxFilesList.getObjectSummaries());
}
Expand All @@ -416,7 +481,7 @@ public List<String> listAuxObjects() throws IOException {

for (S3ObjectSummary item : storedAuxFilesSummary) {
String destinationKey = item.getKey();
String fileName = destinationKey.substring(destinationKey.lastIndexOf("/"));
String fileName = destinationKey.substring(destinationKey.lastIndexOf(".") + 1);
logger.fine("S3 cached aux object fileName: " + fileName);
ret.add(fileName);
}
Expand Down
43 changes: 42 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,27 @@ public boolean canWrite() {
public abstract void savePath(Path fileSystemPath) throws IOException;

// same, for an InputStream:
/**
* This method copies a local InputStream into this DataAccess location.
* Note that the S3 driver implementation of this abstract method is problematic,
* because S3 cannot save an object of an unknown length. This effectively
* nullifies any benefits of streaming; as we cannot start saving until we
* have read the entire stream.
* One way of solving this would be to buffer the entire stream as byte[],
* in memory, then save it... Which of course would be limited by the amount
* of memory available, and thus would not work for streams larger than that.
* So we have eventually decided to save save the stream to a temp file, then
* save to S3. This is slower, but guaranteed to work on any size stream.
* An alternative we may want to consider is to not implement this method
* in the S3 driver, and make it throw the UnsupportedDataAccessOperationException,
* similarly to how we handle attempts to open OutputStreams, in this and the
* Swift driver.
* (Not an issue in either FileAccessIO or SwiftAccessIO implementations)
*
* @param inputStream InputStream we want to save
* @param auxItemTag String representing this Auxiliary type ("extension")
* @throws IOException if anything goes wrong.
*/
public abstract void saveInputStream(InputStream inputStream) throws IOException;
public abstract void saveInputStream(InputStream inputStream, Long filesize) throws IOException;

Expand Down Expand Up @@ -133,7 +154,27 @@ public boolean canWrite() {
// this method copies a local filesystem Path into this DataAccess Auxiliary location:
public abstract void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException;

// this method copies a local InputStream into this DataAccess Auxiliary location:
/**
* This method copies a local InputStream into this DataAccess Auxiliary location.
* Note that the S3 driver implementation of this abstract method is problematic,
* because S3 cannot save an object of an unknown length. This effectively
* nullifies any benefits of streaming; as we cannot start saving until we
* have read the entire stream.
* One way of solving this would be to buffer the entire stream as byte[],
* in memory, then save it... Which of course would be limited by the amount
* of memory available, and thus would not work for streams larger than that.
* So we have eventually decided to save save the stream to a temp file, then
* save to S3. This is slower, but guaranteed to work on any size stream.
* An alternative we may want to consider is to not implement this method
* in the S3 driver, and make it throw the UnsupportedDataAccessOperationException,
* similarly to how we handle attempts to open OutputStreams, in this and the
* Swift driver.
* (Not an issue in either FileAccessIO or SwiftAccessIO implementations)
*
* @param inputStream InputStream we want to save
* @param auxItemTag String representing this Auxiliary type ("extension")
* @throws IOException if anything goes wrong.
*/
public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException;
public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException;

Expand Down

0 comments on commit 724b243

Please sign in to comment.