From 5d1ae8619952ea2d0eac37a38bed374533a6c2e6 Mon Sep 17 00:00:00 2001 From: Aday Bujeda Date: Tue, 19 Jul 2022 09:26:54 +0100 Subject: [PATCH] File Detection - Add support for files without extensions --- ...8740-file-recognition-based-on-filename.md | 9 +++++ .../harvard/iq/dataverse/util/FileUtil.java | 40 ++++++++++--------- .../MimeTypeDetectionByFileName.properties | 4 ++ .../propertyFiles/MimeTypeDisplay.properties | 2 + .../propertyFiles/MimeTypeFacets.properties | 2 + .../iq/dataverse/util/FileUtilTest.java | 16 +++++++- src/test/resources/fileutil/Makefile | 1 + 7 files changed, 54 insertions(+), 20 deletions(-) create mode 100644 doc/release-notes/8740-file-recognition-based-on-filename.md create mode 100644 src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties create mode 100644 src/test/resources/fileutil/Makefile diff --git a/doc/release-notes/8740-file-recognition-based-on-filename.md b/doc/release-notes/8740-file-recognition-based-on-filename.md new file mode 100644 index 00000000000..05160a5c198 --- /dev/null +++ b/doc/release-notes/8740-file-recognition-based-on-filename.md @@ -0,0 +1,9 @@ +### File types detection +File types are now detected based on the filename when the file has no extension. + +The following filenames are now detected: + + - Makefile=text/x-makefile + - Snakemake=text/x-snakemake + - Dockerfile=application/x-docker-file + - Vagrantfile=application/x-vagrant-file \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 64dadc54a4a..b554e6c9123 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -28,7 +28,6 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Embargo; import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; @@ -53,7 +52,7 @@ import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatLink; import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatTableCellAlignRight; import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatTableRow; -import java.awt.image.BufferedImage; + import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; @@ -76,7 +75,6 @@ import java.text.MessageFormat; import java.text.SimpleDateFormat; import java.time.LocalDate; -import java.time.format.DateTimeFormatter; import java.util.Map; import java.util.MissingResourceException; import java.util.ArrayList; @@ -90,11 +88,6 @@ import javax.activation.MimetypesFileTypeMap; import javax.ejb.EJBException; import javax.enterprise.inject.spi.CDI; -import javax.faces.application.FacesMessage; -import javax.faces.component.UIComponent; -import javax.faces.component.UIInput; -import javax.faces.context.FacesContext; -import javax.faces.validator.ValidatorException; import javax.json.JsonArray; import javax.json.JsonObject; import javax.xml.stream.XMLStreamConstants; @@ -108,7 +101,6 @@ import java.util.zip.ZipInputStream; import org.apache.commons.io.FilenameUtils; -import com.amazonaws.AmazonServiceException; import edu.harvard.iq.dataverse.dataaccess.DataAccessOption; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; @@ -487,8 +479,8 @@ public static String determineFileType(File f, String fileName) throws IOExcepti // step 4: // Additional processing; if we haven't gotten much useful information // back from Jhove, we'll try and make an educated guess based on - // the file extension: - + // the file name and extension: + if ( fileExtension != null) { logger.fine("fileExtension="+fileExtension); @@ -496,13 +488,18 @@ public static String determineFileType(File f, String fileName) throws IOExcepti if (fileType != null && fileType.startsWith("text/plain") && STATISTICAL_FILE_EXTENSION.containsKey(fileExtension)) { fileType = STATISTICAL_FILE_EXTENSION.get(fileExtension); } else { - fileType = determineFileTypeByExtension(fileName); + fileType = determineFileTypeByNameAndExtension(fileName); } - + logger.fine("mime type recognized by extension: "+fileType); } } else { logger.fine("fileExtension is null"); + String fileTypeByName = lookupFileTypeFromPropertiesFile(fileName); + if(!StringUtil.isEmpty(fileTypeByName)) { + logger.fine(String.format("mime type: %s recognized by filename: %s", fileTypeByName, fileName)); + fileType = fileTypeByName; + } } // step 5: @@ -552,7 +549,7 @@ public static String determineFileType(File f, String fileName) throws IOExcepti return fileType; } - public static String determineFileTypeByExtension(String fileName) { + public static String determineFileTypeByNameAndExtension(String fileName) { String mimetypesFileTypeMapResult = MIME_TYPE_MAP.getContentType(fileName); logger.fine("MimetypesFileTypeMap type by extension, for " + fileName + ": " + mimetypesFileTypeMapResult); if (mimetypesFileTypeMapResult != null) { @@ -567,14 +564,19 @@ public static String determineFileTypeByExtension(String fileName) { } public static String lookupFileTypeFromPropertiesFile(String fileName) { - String fileExtension = FilenameUtils.getExtension(fileName); + String fileKey = FilenameUtils.getExtension(fileName); String propertyFileName = "MimeTypeDetectionByFileExtension"; + if(fileKey == null || fileKey.isEmpty()) { + fileKey = fileName; + propertyFileName = "MimeTypeDetectionByFileName"; + + } String propertyFileNameOnDisk = propertyFileName + ".properties"; try { - logger.fine("checking " + propertyFileNameOnDisk + " for file extension " + fileExtension); - return BundleUtil.getStringFromPropertyFile(fileExtension, propertyFileName); + logger.fine("checking " + propertyFileNameOnDisk + " for file key " + fileKey); + return BundleUtil.getStringFromPropertyFile(fileKey, propertyFileName); } catch (MissingResourceException ex) { - logger.info(fileExtension + " is a file extension Dataverse doesn't know about. Consider adding it to the " + propertyFileNameOnDisk + " file."); + logger.info(fileKey + " is a filename/extension Dataverse doesn't know about. Consider adding it to the " + propertyFileNameOnDisk + " file."); return null; } } @@ -1145,7 +1147,7 @@ public static CreateDataFileResult createDataFiles(DatasetVersion version, Input } else { // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; - String type = determineFileTypeByExtension(fileName); + String type = determineFileTypeByNameAndExtension(fileName); if (!StringUtils.isBlank(type)) { //Use rules for deciding when to trust browser supplied type if (useRecognizedType(finalType, type)) { diff --git a/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties b/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties new file mode 100644 index 00000000000..70b0c4e371e --- /dev/null +++ b/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties @@ -0,0 +1,4 @@ +Makefile=text/x-makefile +Snakemake=text/x-snakemake +Dockerfile=application/x-docker-file +Vagrantfile=application/x-vagrant-file diff --git a/src/main/java/propertyFiles/MimeTypeDisplay.properties b/src/main/java/propertyFiles/MimeTypeDisplay.properties index ebc07bda0f6..928419c0405 100644 --- a/src/main/java/propertyFiles/MimeTypeDisplay.properties +++ b/src/main/java/propertyFiles/MimeTypeDisplay.properties @@ -219,5 +219,7 @@ video/webm=WebM Video text/xml-graphml=GraphML Network Data # Other application/octet-stream=Unknown +application/x-docker-file=Docker Image File +application/x-vagrant-file=Vagrant Image File # Dataverse-specific application/vnd.dataverse.file-package=Dataverse Package diff --git a/src/main/java/propertyFiles/MimeTypeFacets.properties b/src/main/java/propertyFiles/MimeTypeFacets.properties index 931624b5489..2cac63a7ad0 100644 --- a/src/main/java/propertyFiles/MimeTypeFacets.properties +++ b/src/main/java/propertyFiles/MimeTypeFacets.properties @@ -76,6 +76,8 @@ text/x-ruby-script=Code text/x-dagman=Code text/x-makefile=Code text/x-snakemake=Code +application/x-docker-file=Code +application/x-vagrant-file=Code # Ingested text/tab-separated-values=Tabular Data # Data diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java index 141e97b9b9b..226c677ed0f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java @@ -303,7 +303,7 @@ public void testRescaleImage() throws IOException { }*/ @Test - public void testDetermineFileType() { + public void testDetermineFileTypeByExtension() { File file = new File("src/main/webapp/resources/images/cc0.png"); if (file.exists()) { try { @@ -316,6 +316,20 @@ public void testDetermineFileType() { } } + @Test + public void testDetermineFileTypeByName() { + File file = new File("src/test/resources/fileutil/Makefile"); + if (file.exists()) { + try { + assertEquals("text/x-makefile", FileUtil.determineFileType(file, "Makefile")); + } catch (IOException ex) { + Logger.getLogger(FileUtilTest.class.getName()).log(Level.SEVERE, null, ex); + } + } else { + fail("File does not exist: " + file.toPath().toString()); + } + } + // isThumbnailSuppported() has been moved from DataFileService to FileUtil: /** * Expect that {@code null}, a DataFile without content type and a DataFile diff --git a/src/test/resources/fileutil/Makefile b/src/test/resources/fileutil/Makefile new file mode 100644 index 00000000000..8ab4f33d6e5 --- /dev/null +++ b/src/test/resources/fileutil/Makefile @@ -0,0 +1 @@ +To test file type recognition from file name \ No newline at end of file