Skip to content

Commit 2fb9106

Browse files
committed
Check for globus file checksum before publishing
1 parent 9c6e851 commit 2fb9106

File tree

1 file changed

+94
-81
lines changed

1 file changed

+94
-81
lines changed

src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java

Lines changed: 94 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
package edu.harvard.iq.dataverse.util;
2222

2323
import static edu.harvard.iq.dataverse.dataaccess.S3AccessIO.S3_IDENTIFIER_PREFIX;
24+
25+
import com.amazonaws.services.s3.model.S3ObjectSummary;
2426
import edu.harvard.iq.dataverse.DataFile;
2527
import edu.harvard.iq.dataverse.DataFile.ChecksumType;
2628
import edu.harvard.iq.dataverse.DataFileServiceBean;
@@ -1706,102 +1708,113 @@ public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) {
17061708
}
17071709

17081710
public static void validateDataFileChecksum(DataFile dataFile) throws IOException {
1709-
DataFile.ChecksumType checksumType = dataFile.getChecksumType();
1710-
1711-
logger.info(checksumType.toString());
1712-
if (checksumType == null) {
1713-
String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.noChecksumType", Arrays.asList(dataFile.getId().toString()));
1714-
logger.log(Level.INFO, info);
1715-
throw new IOException(info);
1716-
}
1711+
String recalculatedChecksum = null;
1712+
if (dataFile.getContentType().equals(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE)) {
1713+
for (S3ObjectSummary s3ObjectSummary : dataFile.getStorageIO().listAuxObjects("")) {
1714+
recalculatedChecksum = s3ObjectSummary.getETag();
1715+
if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) {
1716+
String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString()));
1717+
logger.log(Level.INFO, info);
1718+
throw new IOException(info);
1719+
}
1720+
}
1721+
} else {
1722+
DataFile.ChecksumType checksumType = dataFile.getChecksumType();
17171723

1718-
StorageIO<DataFile> storage = dataFile.getStorageIO();
1719-
InputStream in = null;
1720-
1721-
try {
1722-
storage.open(DataAccessOption.READ_ACCESS);
1723-
1724-
if (!dataFile.isTabularData()) {
1725-
logger.info("It is not tabular");
1726-
in = storage.getInputStream();
1727-
} else {
1728-
// if this is a tabular file, read the preserved original "auxiliary file"
1729-
// instead:
1730-
in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
1724+
logger.info(checksumType.toString());
1725+
if (checksumType == null) {
1726+
String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.noChecksumType", Arrays.asList(dataFile.getId().toString()));
1727+
logger.log(Level.INFO, info);
1728+
throw new IOException(info);
17311729
}
1732-
} catch (IOException ioex) {
1733-
in = null;
1734-
}
17351730

1736-
if (in == null) {
1737-
String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failRead", Arrays.asList(dataFile.getId().toString()));
1738-
logger.log(Level.INFO, info);
1739-
throw new IOException(info);
1740-
}
1731+
StorageIO<DataFile> storage = dataFile.getStorageIO();
1732+
InputStream in = null;
17411733

1742-
String recalculatedChecksum = null;
1743-
try {
1744-
logger.info("Before calculating checksum");
1745-
recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
1746-
logger.info("Checksum:" + recalculatedChecksum);
1747-
} catch (RuntimeException rte) {
1748-
recalculatedChecksum = null;
1749-
} finally {
1750-
IOUtils.closeQuietly(in);
1751-
}
1752-
1753-
if (recalculatedChecksum == null) {
1754-
String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failCalculateChecksum", Arrays.asList(dataFile.getId().toString()));
1755-
logger.log(Level.INFO, info);
1756-
throw new IOException(info);
1757-
}
1758-
1759-
// TODO? What should we do if the datafile does not have a non-null checksum?
1760-
// Should we fail, or should we assume that the recalculated checksum
1761-
// is correct, and populate the checksumValue field with it?
1762-
if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) {
1763-
// There's one possible condition that is 100% recoverable and can
1764-
// be automatically fixed (issue #6660):
1765-
logger.info(dataFile.getChecksumValue());
1766-
logger.info(recalculatedChecksum);
1767-
logger.info("Checksums are not equal");
1768-
boolean fixed = false;
1769-
if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) {
1770-
// try again, see if the .orig file happens to be there:
1771-
try {
1734+
try {
1735+
storage.open(DataAccessOption.READ_ACCESS);
1736+
1737+
if (!dataFile.isTabularData()) {
1738+
logger.info("It is not tabular");
1739+
in = storage.getInputStream();
1740+
} else {
1741+
// if this is a tabular file, read the preserved original "auxiliary file"
1742+
// instead:
17721743
in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
1773-
} catch (IOException ioex) {
1774-
in = null;
17751744
}
1776-
if (in != null) {
1745+
} catch (IOException ioex) {
1746+
in = null;
1747+
}
1748+
1749+
if (in == null) {
1750+
String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failRead", Arrays.asList(dataFile.getId().toString()));
1751+
logger.log(Level.INFO, info);
1752+
throw new IOException(info);
1753+
}
1754+
1755+
try {
1756+
logger.info("Before calculating checksum");
1757+
recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
1758+
logger.info("Checksum:" + recalculatedChecksum);
1759+
} catch (RuntimeException rte) {
1760+
recalculatedChecksum = null;
1761+
} finally {
1762+
IOUtils.closeQuietly(in);
1763+
}
1764+
1765+
if (recalculatedChecksum == null) {
1766+
String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failCalculateChecksum", Arrays.asList(dataFile.getId().toString()));
1767+
logger.log(Level.INFO, info);
1768+
throw new IOException(info);
1769+
}
1770+
1771+
// TODO? What should we do if the datafile does not have a non-null checksum?
1772+
// Should we fail, or should we assume that the recalculated checksum
1773+
// is correct, and populate the checksumValue field with it?
1774+
if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) {
1775+
// There's one possible condition that is 100% recoverable and can
1776+
// be automatically fixed (issue #6660):
1777+
logger.info(dataFile.getChecksumValue());
1778+
logger.info(recalculatedChecksum);
1779+
logger.info("Checksums are not equal");
1780+
boolean fixed = false;
1781+
if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) {
1782+
// try again, see if the .orig file happens to be there:
17771783
try {
1778-
recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
1779-
} catch (RuntimeException rte) {
1780-
recalculatedChecksum = null;
1781-
} finally {
1782-
IOUtils.closeQuietly(in);
1784+
in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
1785+
} catch (IOException ioex) {
1786+
in = null;
17831787
}
1784-
// try again:
1785-
if (recalculatedChecksum.equals(dataFile.getChecksumValue())) {
1786-
fixed = true;
1788+
if (in != null) {
17871789
try {
1788-
storage.revertBackupAsAux(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
1789-
} catch (IOException ioex) {
1790-
fixed = false;
1790+
recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
1791+
} catch (RuntimeException rte) {
1792+
recalculatedChecksum = null;
1793+
} finally {
1794+
IOUtils.closeQuietly(in);
1795+
}
1796+
// try again:
1797+
if (recalculatedChecksum.equals(dataFile.getChecksumValue())) {
1798+
fixed = true;
1799+
try {
1800+
storage.revertBackupAsAux(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
1801+
} catch (IOException ioex) {
1802+
fixed = false;
1803+
}
17911804
}
17921805
}
17931806
}
1794-
}
1795-
1796-
if (!fixed) {
1797-
logger.info("checksum cannot be fixed");
1798-
String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString()));
1799-
logger.log(Level.INFO, info);
1800-
throw new IOException(info);
1807+
1808+
if (!fixed) {
1809+
logger.info("checksum cannot be fixed");
1810+
String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString()));
1811+
logger.log(Level.INFO, info);
1812+
throw new IOException(info);
1813+
}
18011814
}
18021815
}
1803-
18041816
logger.log(Level.INFO, "successfully validated DataFile {0}; checksum {1}", new Object[]{dataFile.getId(), recalculatedChecksum});
1817+
18051818
}
18061819

18071820
public static String getStorageIdentifierFromLocation(String location) {

0 commit comments

Comments
 (0)