Merge pull request #4213 from IQSS/4185-worldmap-s3-thumbnail

4185 worldmap s3 thumbnail
IQSS · Oct 20, 2017 · 724b243 · 724b243
2 parents 6b53fa1 + 75f8d3b
commit 724b243
Show file tree

Hide file tree

Showing 4 changed files with 146 additions and 29 deletions.
diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java
@@ -502,17 +502,28 @@ public String save() {
         return "";
     }
 
+    private Boolean thumbnailAvailable = null; 
+
     public boolean isThumbnailAvailable(FileMetadata fileMetadata) {
         // new and optimized logic: 
         // - check download permission here (should be cached - so it's free!)
         // - only then ask the file service if the thumbnail is available/exists.
-        // the service itself no longer checks download permissions.  
+        // the service itself no longer checks download permissions.
+        // (Also, cache the result the first time the check is performed... 
+        // remember - methods referenced in "rendered=..." attributes are 
+        // called *multiple* times as the page is loading!)
 
+        if (thumbnailAvailable != null) {
+            return thumbnailAvailable;
+        }
+
         if (!fileDownloadHelper.canDownloadFile(fileMetadata)) {
-            return false;
+            thumbnailAvailable = false;
+        } else {
+            thumbnailAvailable = datafileService.isThumbnailAvailable(fileMetadata.getDataFile());
         }
-
-        return datafileService.isThumbnailAvailable(fileMetadata.getDataFile());
+        
+        return thumbnailAvailable;
     }
 
     private String returnToDatasetOnly(){

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
@@ -309,11 +309,11 @@ private static boolean generateWorldMapThumbnail(StorageIO<DataFile> storageIO,
                 return false;
             }
         } catch (FileNotFoundException fnfe) {
-            logger.fine("No .img file for this worldmap file yet; giving up.");
+            logger.fine("No .img file for this worldmap file yet; giving up. Original Error: " + fnfe);
             return false;
 
         } catch (IOException ioex) {
-            logger.warning("caught IOException trying to open an input stream for worldmap .img file (" + storageIO.getDataFile().getStorageIdentifier() + ")");
+            logger.warning("caught IOException trying to open an input stream for worldmap .img file (" + storageIO.getDataFile().getStorageIdentifier() + "). Original Error: " + ioex);
             return false;
         }
 

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -25,18 +25,24 @@
 import edu.harvard.iq.dataverse.Dataverse;
 import edu.harvard.iq.dataverse.DvObject;
 import edu.harvard.iq.dataverse.datavariable.DataVariable;
+import edu.harvard.iq.dataverse.util.FileUtil;
+import java.io.ByteArrayInputStream;
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.nio.channels.Channel;
 import java.nio.channels.Channels;
 import java.nio.channels.WritableByteChannel;
 import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.List;
+import java.util.Random;
 import java.util.logging.Logger;
 import org.apache.commons.io.IOUtils;
 
@@ -186,6 +192,7 @@ public void savePath(Path fileSystemPath) throws IOException {
             File inputFile = fileSystemPath.toFile();
             if (dvObject instanceof DataFile) {
                 s3.putObject(new PutObjectRequest(bucketName, key, inputFile));
+
                 newFileSize = inputFile.length();
             } else {
                 throw new IOException("DvObject type other than datafile is not yet supported");
@@ -205,6 +212,25 @@ public void savePath(Path fileSystemPath) throws IOException {
         setSize(newFileSize);
     }
 
+    /**
+     * Implements the StorageIO saveInputStream() method. 
+     * This implementation is somewhat problematic, because S3 cannot save an object of 
+     * an unknown length. This effectively nullifies any benefits of streaming; 
+     * as we cannot start saving until we have read the entire stream. 
+     * One way of solving this would be to buffer the entire stream as byte[], 
+     * in memory, then save it... Which of course would be limited by the amount 
+     * of memory available, and thus would not work for streams larger than that. 
+     * So we have eventually decided to save save the stream to a temp file, then 
+     * save to S3. This is slower, but guaranteed to work on any size stream. 
+     * An alternative we may want to consider is to not implement this method 
+     * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException, 
+     * similarly to how we handle attempts to open OutputStreams, in this and the 
+     * Swift driver. 
+     * 
+     * @param inputStream InputStream we want to save
+     * @param auxItemTag String representing this Auxiliary type ("extension")
+     * @throws IOException if anything goes wrong.
+    */
     @Override
     public void saveInputStream(InputStream inputStream, Long filesize) throws IOException {
         if (filesize == null || filesize < 0) {
@@ -235,24 +261,23 @@ public void saveInputStream(InputStream inputStream) throws IOException {
         if (!this.canWrite()) {
             open(DataAccessOption.WRITE_ACCESS);
         }
-        //TODO? Copying over the object to a byte array is farily inefficient.
-        // We need the length of the data to upload inputStreams (see our putObject calls).
-        // There may be ways to work around this, see https://github.com/aws/aws-sdk-java/issues/474 to start.
-        // This is out of scope of creating the S3 driver and referenced in issue #4064!
-        byte[] bytes = IOUtils.toByteArray(inputStream);
-        long length = bytes.length;
-        ObjectMetadata metadata = new ObjectMetadata();
-        metadata.setContentLength(length);
+        String directoryString = FileUtil.getFilesTempDirectory();
+
+        Random rand = new Random();
+        Path tempPath = Paths.get(directoryString, Integer.toString(rand.nextInt(Integer.MAX_VALUE)));
+        File tempFile = createTempFile(tempPath, inputStream);
+
         try {
-            s3.putObject(bucketName, key, inputStream, metadata);
+            s3.putObject(bucketName, key, tempFile);
         } catch (SdkClientException ioex) {
             String failureMsg = ioex.getMessage();
             if (failureMsg == null) {
                 failureMsg = "S3AccessIO: Unknown exception occured while uploading a local file into S3 Storage.";
             }
-
+            tempFile.delete();
             throw new IOException(failureMsg);
         }
+        tempFile.delete();
         setSize(s3.getObjectMetadata(bucketName, key).getContentLength());
     }
 
@@ -336,7 +361,7 @@ public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOExcep
         String destinationKey = getDestinationKey(auxItemTag);
         try {
             File inputFile = fileSystemPath.toFile();
-            s3.putObject(new PutObjectRequest(bucketName, destinationKey, inputFile));
+            s3.putObject(new PutObjectRequest(bucketName, destinationKey, inputFile));            
         } catch (AmazonClientException ase) {
             logger.warning("Caught an AmazonServiceException in S3AccessIO.savePathAsAux():    " + ase.getMessage());
             throw new IOException("S3AccessIO: Failed to save path as an auxiliary object.");
@@ -367,31 +392,71 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon
         }
     }
 
-    //todo: add new method with size?
-    //or just check the data file content size?
-    // this method copies a local InputStream into this DataAccess Auxiliary location:
+    /**
+     * Implements the StorageIO saveInputStreamAsAux() method. 
+     * This implementation is problematic, because S3 cannot save an object of 
+     * an unknown length. This effectively nullifies any benefits of streaming; 
+     * as we cannot start saving until we have read the entire stream. 
+     * One way of solving this would be to buffer the entire stream as byte[], 
+     * in memory, then save it... Which of course would be limited by the amount 
+     * of memory available, and thus would not work for streams larger than that. 
+     * So we have eventually decided to save save the stream to a temp file, then 
+     * save to S3. This is slower, but guaranteed to work on any size stream. 
+     * An alternative we may want to consider is to not implement this method 
+     * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException, 
+     * similarly to how we handle attempts to open OutputStreams, in this and the 
+     * Swift driver. 
+     * 
+     * @param inputStream InputStream we want to save
+     * @param auxItemTag String representing this Auxiliary type ("extension")
+     * @throws IOException if anything goes wrong.
+    */
     @Override
     public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException {
         if (!this.canWrite()) {
             open(DataAccessOption.WRITE_ACCESS);
         }
+
+        String directoryString = FileUtil.getFilesTempDirectory();
+
+        Random rand = new Random();
+        String pathNum = Integer.toString(rand.nextInt(Integer.MAX_VALUE));
+        Path tempPath = Paths.get(directoryString, pathNum);
+        File tempFile = createTempFile(tempPath, inputStream);
+
         String destinationKey = getDestinationKey(auxItemTag);
-        byte[] bytes = IOUtils.toByteArray(inputStream);
-        long length = bytes.length;
-        ObjectMetadata metadata = new ObjectMetadata();
-        metadata.setContentLength(length);
+
         try {
-            s3.putObject(bucketName, destinationKey, inputStream, metadata);
+            s3.putObject(bucketName, destinationKey, tempFile);
         } catch (SdkClientException ioex) {
             String failureMsg = ioex.getMessage();
 
             if (failureMsg == null) {
                 failureMsg = "S3AccessIO: Unknown exception occured while saving a local InputStream as S3Object";
             }
+            tempFile.delete();
             throw new IOException(failureMsg);
         }
+        tempFile.delete();
     }
-
+
+    //Helper method for supporting saving streams with unknown length to S3
+    //We save those streams to a file and then upload the file
+    private File createTempFile(Path path, InputStream inputStream) throws IOException {
+
+        File targetFile = new File(path.toUri()); //File needs a name
+        OutputStream outStream = new FileOutputStream(targetFile);
+
+        byte[] buffer = new byte[8 * 1024];
+        int bytesRead;
+        while ((bytesRead = inputStream.read(buffer)) != -1) {
+            outStream.write(buffer, 0, bytesRead);
+        }
+        IOUtils.closeQuietly(inputStream);
+        IOUtils.closeQuietly(outStream);
+        return targetFile;
+    } 
+
     @Override
     public List<String> listAuxObjects() throws IOException {
         if (!this.canWrite()) {
@@ -405,7 +470,7 @@ public List<String> listAuxObjects() throws IOException {
         List<S3ObjectSummary> storedAuxFilesSummary = storedAuxFilesList.getObjectSummaries();
         try {
             while (storedAuxFilesList.isTruncated()) {
-                logger.fine("S3 listAuxObjects: going to second page of list");
+                logger.fine("S3 listAuxObjects: going to next page of list");
                 storedAuxFilesList = s3.listNextBatchOfObjects(storedAuxFilesList);
                 storedAuxFilesSummary.addAll(storedAuxFilesList.getObjectSummaries());
             }
@@ -416,7 +481,7 @@ public List<String> listAuxObjects() throws IOException {
 
         for (S3ObjectSummary item : storedAuxFilesSummary) {
             String destinationKey = item.getKey();
-            String fileName = destinationKey.substring(destinationKey.lastIndexOf("/"));
+            String fileName = destinationKey.substring(destinationKey.lastIndexOf(".") + 1);
             logger.fine("S3 cached aux object fileName: " + fileName);
             ret.add(fileName);
         }

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
@@ -106,6 +106,27 @@ public boolean canWrite() {
     public abstract void savePath(Path fileSystemPath) throws IOException;
 
     // same, for an InputStream:
+    /**
+     * This method copies a local InputStream into this DataAccess location.
+     * Note that the S3 driver implementation of this abstract method is problematic, 
+     * because S3 cannot save an object of an unknown length. This effectively 
+     * nullifies any benefits of streaming; as we cannot start saving until we 
+     * have read the entire stream. 
+     * One way of solving this would be to buffer the entire stream as byte[], 
+     * in memory, then save it... Which of course would be limited by the amount 
+     * of memory available, and thus would not work for streams larger than that. 
+     * So we have eventually decided to save save the stream to a temp file, then 
+     * save to S3. This is slower, but guaranteed to work on any size stream. 
+     * An alternative we may want to consider is to not implement this method 
+     * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException, 
+     * similarly to how we handle attempts to open OutputStreams, in this and the 
+     * Swift driver. 
+     * (Not an issue in either FileAccessIO or SwiftAccessIO implementations)
+     * 
+     * @param inputStream InputStream we want to save
+     * @param auxItemTag String representing this Auxiliary type ("extension")
+     * @throws IOException if anything goes wrong.
+    */
     public abstract void saveInputStream(InputStream inputStream) throws IOException;
     public abstract void saveInputStream(InputStream inputStream, Long filesize) throws IOException;
 
@@ -133,7 +154,27 @@ public boolean canWrite() {
     // this method copies a local filesystem Path into this DataAccess Auxiliary location:
     public abstract void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException;
 
-    // this method copies a local InputStream into this DataAccess Auxiliary location:
+    /**
+     * This method copies a local InputStream into this DataAccess Auxiliary location.
+     * Note that the S3 driver implementation of this abstract method is problematic, 
+     * because S3 cannot save an object of an unknown length. This effectively 
+     * nullifies any benefits of streaming; as we cannot start saving until we 
+     * have read the entire stream. 
+     * One way of solving this would be to buffer the entire stream as byte[], 
+     * in memory, then save it... Which of course would be limited by the amount 
+     * of memory available, and thus would not work for streams larger than that. 
+     * So we have eventually decided to save save the stream to a temp file, then 
+     * save to S3. This is slower, but guaranteed to work on any size stream. 
+     * An alternative we may want to consider is to not implement this method 
+     * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException, 
+     * similarly to how we handle attempts to open OutputStreams, in this and the 
+     * Swift driver. 
+     * (Not an issue in either FileAccessIO or SwiftAccessIO implementations)
+     * 
+     * @param inputStream InputStream we want to save
+     * @param auxItemTag String representing this Auxiliary type ("extension")
+     * @throws IOException if anything goes wrong.
+    */
     public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException; 
     public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException;