Skip to content

Commit 575e3b5

Browse files
committed
File Detection - Add support for files without extensions
1 parent a324b51 commit 575e3b5

File tree

7 files changed

+59
-30
lines changed

7 files changed

+59
-30
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
### File types detection
2+
File types are now detected based on the filename when the file has no extension.
3+
4+
The following filenames are now detected:
5+
6+
- Makefile=text/x-makefile
7+
- Snakemake=text/x-snakemake
8+
- Dockerfile=application/x-docker-file
9+
- Vagrantfile=application/x-vagrant-file

src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java

+22-29
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
import edu.harvard.iq.dataverse.DatasetVersion;
2929
import edu.harvard.iq.dataverse.Embargo;
3030
import edu.harvard.iq.dataverse.FileMetadata;
31-
import edu.harvard.iq.dataverse.TermsOfUseAndAccess;
3231
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
3332
import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
3433
import edu.harvard.iq.dataverse.dataaccess.S3AccessIO;
@@ -53,7 +52,7 @@
5352
import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatLink;
5453
import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatTableCellAlignRight;
5554
import static edu.harvard.iq.dataverse.util.xml.html.HtmlFormatUtil.formatTableRow;
56-
import java.awt.image.BufferedImage;
55+
5756
import java.io.BufferedInputStream;
5857
import java.io.File;
5958
import java.io.FileInputStream;
@@ -76,7 +75,6 @@
7675
import java.text.MessageFormat;
7776
import java.text.SimpleDateFormat;
7877
import java.time.LocalDate;
79-
import java.time.format.DateTimeFormatter;
8078
import java.util.Map;
8179
import java.util.MissingResourceException;
8280
import java.util.ArrayList;
@@ -90,11 +88,6 @@
9088
import javax.activation.MimetypesFileTypeMap;
9189
import javax.ejb.EJBException;
9290
import javax.enterprise.inject.spi.CDI;
93-
import javax.faces.application.FacesMessage;
94-
import javax.faces.component.UIComponent;
95-
import javax.faces.component.UIInput;
96-
import javax.faces.context.FacesContext;
97-
import javax.faces.validator.ValidatorException;
9891
import javax.json.JsonArray;
9992
import javax.json.JsonObject;
10093
import javax.xml.stream.XMLStreamConstants;
@@ -108,7 +101,6 @@
108101
import java.util.zip.ZipInputStream;
109102
import org.apache.commons.io.FilenameUtils;
110103

111-
import com.amazonaws.AmazonServiceException;
112104
import edu.harvard.iq.dataverse.dataaccess.DataAccessOption;
113105
import edu.harvard.iq.dataverse.dataaccess.StorageIO;
114106
import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker;
@@ -487,22 +479,18 @@ public static String determineFileType(File f, String fileName) throws IOExcepti
487479
// step 4:
488480
// Additional processing; if we haven't gotten much useful information
489481
// back from Jhove, we'll try and make an educated guess based on
490-
// the file extension:
491-
492-
if ( fileExtension != null) {
493-
logger.fine("fileExtension="+fileExtension);
482+
// the file name and extension:
494483

495-
if (fileType == null || fileType.startsWith("text/plain") || "application/octet-stream".equals(fileType)) {
496-
if (fileType != null && fileType.startsWith("text/plain") && STATISTICAL_FILE_EXTENSION.containsKey(fileExtension)) {
497-
fileType = STATISTICAL_FILE_EXTENSION.get(fileExtension);
498-
} else {
499-
fileType = determineFileTypeByExtension(fileName);
500-
}
501-
502-
logger.fine("mime type recognized by extension: "+fileType);
484+
logger.fine("fileName="+fileName);
485+
486+
if (fileType == null || fileType.startsWith("text/plain") || "application/octet-stream".equals(fileType)) {
487+
if (fileExtension != null && fileType != null && fileType.startsWith("text/plain") && STATISTICAL_FILE_EXTENSION.containsKey(fileExtension)) {
488+
fileType = STATISTICAL_FILE_EXTENSION.get(fileExtension);
489+
} else {
490+
fileType = determineFileTypeByNameAndExtension(fileName);
503491
}
504-
} else {
505-
logger.fine("fileExtension is null");
492+
493+
logger.fine("mime type recognized by name/extension: "+fileType);
506494
}
507495

508496
// step 5:
@@ -552,7 +540,7 @@ public static String determineFileType(File f, String fileName) throws IOExcepti
552540
return fileType;
553541
}
554542

555-
public static String determineFileTypeByExtension(String fileName) {
543+
public static String determineFileTypeByNameAndExtension(String fileName) {
556544
String mimetypesFileTypeMapResult = MIME_TYPE_MAP.getContentType(fileName);
557545
logger.fine("MimetypesFileTypeMap type by extension, for " + fileName + ": " + mimetypesFileTypeMapResult);
558546
if (mimetypesFileTypeMapResult != null) {
@@ -567,14 +555,19 @@ public static String determineFileTypeByExtension(String fileName) {
567555
}
568556

569557
public static String lookupFileTypeFromPropertiesFile(String fileName) {
570-
String fileExtension = FilenameUtils.getExtension(fileName);
558+
String fileKey = FilenameUtils.getExtension(fileName);
571559
String propertyFileName = "MimeTypeDetectionByFileExtension";
560+
if(fileKey == null || fileKey.isEmpty()) {
561+
fileKey = fileName;
562+
propertyFileName = "MimeTypeDetectionByFileName";
563+
564+
}
572565
String propertyFileNameOnDisk = propertyFileName + ".properties";
573566
try {
574-
logger.fine("checking " + propertyFileNameOnDisk + " for file extension " + fileExtension);
575-
return BundleUtil.getStringFromPropertyFile(fileExtension, propertyFileName);
567+
logger.fine("checking " + propertyFileNameOnDisk + " for file key " + fileKey);
568+
return BundleUtil.getStringFromPropertyFile(fileKey, propertyFileName);
576569
} catch (MissingResourceException ex) {
577-
logger.info(fileExtension + " is a file extension Dataverse doesn't know about. Consider adding it to the " + propertyFileNameOnDisk + " file.");
570+
logger.info(fileKey + " is a file extension Dataverse doesn't know about. Consider adding it to the " + propertyFileNameOnDisk + " file.");
578571
return null;
579572
}
580573
}
@@ -1145,7 +1138,7 @@ public static CreateDataFileResult createDataFiles(DatasetVersion version, Input
11451138
} else {
11461139
// Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied
11471140
finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType;
1148-
String type = determineFileTypeByExtension(fileName);
1141+
String type = determineFileTypeByNameAndExtension(fileName);
11491142
if (!StringUtils.isBlank(type)) {
11501143
//Use rules for deciding when to trust browser supplied type
11511144
if (useRecognizedType(finalType, type)) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Makefile=text/x-makefile
2+
Snakemake=text/x-snakemake
3+
Dockerfile=application/x-docker-file
4+
Vagrantfile=application/x-vagrant-file

src/main/java/propertyFiles/MimeTypeDisplay.properties

+4
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ application/x-sas-syntax=SAS Syntax
7373
type/x-r-syntax=R Syntax
7474
application/vnd.wolfram.mathematica.package=Wolfram Mathematica Code
7575
application/vnd.wolfram.mathematica=Wolfram Mathematica Code
76+
text/x-makefile=Makefile Script
77+
text/x-snakemake=Snakemake Workflow
7678
# Ingested Tabular Data
7779
text/tab-separated-values=Tab-Delimited
7880
# RawData
@@ -211,5 +213,7 @@ video/webm=WebM Video
211213
text/xml-graphml=GraphML Network Data
212214
# Other
213215
application/octet-stream=Unknown
216+
application/x-docker-file=Docker Image File
217+
application/x-vagrant-file=Vagrant Image File
214218
# Dataverse-specific
215219
application/vnd.dataverse.file-package=Dataverse Package

src/main/java/propertyFiles/MimeTypeFacets.properties

+4
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ type/x-r-syntax=Code
6868
application/postscript=Code
6969
application/vnd.wolfram.mathematica.package=Code
7070
application/vnd.wolfram.mathematica=Code
71+
text/x-makefile=Code
72+
text/x-snakemake=Code
73+
application/x-docker-file=Code
74+
application/x-vagrant-file=Code
7175
# Ingested
7276
text/tab-separated-values=Tabular Data
7377
# Data

src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java

+15-1
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ public void testRescaleImage() throws IOException {
303303
}*/
304304

305305
@Test
306-
public void testDetermineFileType() {
306+
public void testDetermineFileTypeByExtension() {
307307
File file = new File("src/main/webapp/resources/images/cc0.png");
308308
if (file.exists()) {
309309
try {
@@ -316,6 +316,20 @@ public void testDetermineFileType() {
316316
}
317317
}
318318

319+
@Test
320+
public void testDetermineFileTypeByName() {
321+
File file = new File("src/test/resources/fileutil/Makefile");
322+
if (file.exists()) {
323+
try {
324+
assertEquals("text/x-makefile", FileUtil.determineFileType(file, "Makefile"));
325+
} catch (IOException ex) {
326+
Logger.getLogger(FileUtilTest.class.getName()).log(Level.SEVERE, null, ex);
327+
}
328+
} else {
329+
fail("File does not exist: " + file.toPath().toString());
330+
}
331+
}
332+
319333
// isThumbnailSuppported() has been moved from DataFileService to FileUtil:
320334
/**
321335
* Expect that {@code null}, a DataFile without content type and a DataFile

src/test/resources/fileutil/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
To test file type recognition from file name

0 commit comments

Comments
 (0)