|
21 | 21 | package edu.harvard.iq.dataverse.util;
|
22 | 22 |
|
23 | 23 | import static edu.harvard.iq.dataverse.dataaccess.S3AccessIO.S3_IDENTIFIER_PREFIX;
|
| 24 | + |
| 25 | +import com.amazonaws.services.s3.model.S3ObjectSummary; |
24 | 26 | import edu.harvard.iq.dataverse.DataFile;
|
25 | 27 | import edu.harvard.iq.dataverse.DataFile.ChecksumType;
|
26 | 28 | import edu.harvard.iq.dataverse.DataFileServiceBean;
|
@@ -1706,102 +1708,113 @@ public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) {
|
1706 | 1708 | }
|
1707 | 1709 |
|
1708 | 1710 | public static void validateDataFileChecksum(DataFile dataFile) throws IOException {
|
1709 |
| - DataFile.ChecksumType checksumType = dataFile.getChecksumType(); |
1710 |
| - |
1711 |
| - logger.info(checksumType.toString()); |
1712 |
| - if (checksumType == null) { |
1713 |
| - String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.noChecksumType", Arrays.asList(dataFile.getId().toString())); |
1714 |
| - logger.log(Level.INFO, info); |
1715 |
| - throw new IOException(info); |
1716 |
| - } |
| 1711 | + String recalculatedChecksum = null; |
| 1712 | + if (dataFile.getContentType().equals(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE)) { |
| 1713 | + for (S3ObjectSummary s3ObjectSummary : dataFile.getStorageIO().listAuxObjects("")) { |
| 1714 | + recalculatedChecksum = s3ObjectSummary.getETag(); |
| 1715 | + if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) { |
| 1716 | + String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString())); |
| 1717 | + logger.log(Level.INFO, info); |
| 1718 | + throw new IOException(info); |
| 1719 | + } |
| 1720 | + } |
| 1721 | + } else { |
| 1722 | + DataFile.ChecksumType checksumType = dataFile.getChecksumType(); |
1717 | 1723 |
|
1718 |
| - StorageIO<DataFile> storage = dataFile.getStorageIO(); |
1719 |
| - InputStream in = null; |
1720 |
| - |
1721 |
| - try { |
1722 |
| - storage.open(DataAccessOption.READ_ACCESS); |
1723 |
| - |
1724 |
| - if (!dataFile.isTabularData()) { |
1725 |
| - logger.info("It is not tabular"); |
1726 |
| - in = storage.getInputStream(); |
1727 |
| - } else { |
1728 |
| - // if this is a tabular file, read the preserved original "auxiliary file" |
1729 |
| - // instead: |
1730 |
| - in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); |
| 1724 | + logger.info(checksumType.toString()); |
| 1725 | + if (checksumType == null) { |
| 1726 | + String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.noChecksumType", Arrays.asList(dataFile.getId().toString())); |
| 1727 | + logger.log(Level.INFO, info); |
| 1728 | + throw new IOException(info); |
1731 | 1729 | }
|
1732 |
| - } catch (IOException ioex) { |
1733 |
| - in = null; |
1734 |
| - } |
1735 | 1730 |
|
1736 |
| - if (in == null) { |
1737 |
| - String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failRead", Arrays.asList(dataFile.getId().toString())); |
1738 |
| - logger.log(Level.INFO, info); |
1739 |
| - throw new IOException(info); |
1740 |
| - } |
| 1731 | + StorageIO<DataFile> storage = dataFile.getStorageIO(); |
| 1732 | + InputStream in = null; |
1741 | 1733 |
|
1742 |
| - String recalculatedChecksum = null; |
1743 |
| - try { |
1744 |
| - logger.info("Before calculating checksum"); |
1745 |
| - recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); |
1746 |
| - logger.info("Checksum:" + recalculatedChecksum); |
1747 |
| - } catch (RuntimeException rte) { |
1748 |
| - recalculatedChecksum = null; |
1749 |
| - } finally { |
1750 |
| - IOUtils.closeQuietly(in); |
1751 |
| - } |
1752 |
| - |
1753 |
| - if (recalculatedChecksum == null) { |
1754 |
| - String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failCalculateChecksum", Arrays.asList(dataFile.getId().toString())); |
1755 |
| - logger.log(Level.INFO, info); |
1756 |
| - throw new IOException(info); |
1757 |
| - } |
1758 |
| - |
1759 |
| - // TODO? What should we do if the datafile does not have a non-null checksum? |
1760 |
| - // Should we fail, or should we assume that the recalculated checksum |
1761 |
| - // is correct, and populate the checksumValue field with it? |
1762 |
| - if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) { |
1763 |
| - // There's one possible condition that is 100% recoverable and can |
1764 |
| - // be automatically fixed (issue #6660): |
1765 |
| - logger.info(dataFile.getChecksumValue()); |
1766 |
| - logger.info(recalculatedChecksum); |
1767 |
| - logger.info("Checksums are not equal"); |
1768 |
| - boolean fixed = false; |
1769 |
| - if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) { |
1770 |
| - // try again, see if the .orig file happens to be there: |
1771 |
| - try { |
| 1734 | + try { |
| 1735 | + storage.open(DataAccessOption.READ_ACCESS); |
| 1736 | + |
| 1737 | + if (!dataFile.isTabularData()) { |
| 1738 | + logger.info("It is not tabular"); |
| 1739 | + in = storage.getInputStream(); |
| 1740 | + } else { |
| 1741 | + // if this is a tabular file, read the preserved original "auxiliary file" |
| 1742 | + // instead: |
1772 | 1743 | in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
|
1773 |
| - } catch (IOException ioex) { |
1774 |
| - in = null; |
1775 | 1744 | }
|
1776 |
| - if (in != null) { |
| 1745 | + } catch (IOException ioex) { |
| 1746 | + in = null; |
| 1747 | + } |
| 1748 | + |
| 1749 | + if (in == null) { |
| 1750 | + String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failRead", Arrays.asList(dataFile.getId().toString())); |
| 1751 | + logger.log(Level.INFO, info); |
| 1752 | + throw new IOException(info); |
| 1753 | + } |
| 1754 | + |
| 1755 | + try { |
| 1756 | + logger.info("Before calculating checksum"); |
| 1757 | + recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); |
| 1758 | + logger.info("Checksum:" + recalculatedChecksum); |
| 1759 | + } catch (RuntimeException rte) { |
| 1760 | + recalculatedChecksum = null; |
| 1761 | + } finally { |
| 1762 | + IOUtils.closeQuietly(in); |
| 1763 | + } |
| 1764 | + |
| 1765 | + if (recalculatedChecksum == null) { |
| 1766 | + String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failCalculateChecksum", Arrays.asList(dataFile.getId().toString())); |
| 1767 | + logger.log(Level.INFO, info); |
| 1768 | + throw new IOException(info); |
| 1769 | + } |
| 1770 | + |
| 1771 | + // TODO? What should we do if the datafile does not have a non-null checksum? |
| 1772 | + // Should we fail, or should we assume that the recalculated checksum |
| 1773 | + // is correct, and populate the checksumValue field with it? |
| 1774 | + if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) { |
| 1775 | + // There's one possible condition that is 100% recoverable and can |
| 1776 | + // be automatically fixed (issue #6660): |
| 1777 | + logger.info(dataFile.getChecksumValue()); |
| 1778 | + logger.info(recalculatedChecksum); |
| 1779 | + logger.info("Checksums are not equal"); |
| 1780 | + boolean fixed = false; |
| 1781 | + if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) { |
| 1782 | + // try again, see if the .orig file happens to be there: |
1777 | 1783 | try {
|
1778 |
| - recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); |
1779 |
| - } catch (RuntimeException rte) { |
1780 |
| - recalculatedChecksum = null; |
1781 |
| - } finally { |
1782 |
| - IOUtils.closeQuietly(in); |
| 1784 | + in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); |
| 1785 | + } catch (IOException ioex) { |
| 1786 | + in = null; |
1783 | 1787 | }
|
1784 |
| - // try again: |
1785 |
| - if (recalculatedChecksum.equals(dataFile.getChecksumValue())) { |
1786 |
| - fixed = true; |
| 1788 | + if (in != null) { |
1787 | 1789 | try {
|
1788 |
| - storage.revertBackupAsAux(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); |
1789 |
| - } catch (IOException ioex) { |
1790 |
| - fixed = false; |
| 1790 | + recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); |
| 1791 | + } catch (RuntimeException rte) { |
| 1792 | + recalculatedChecksum = null; |
| 1793 | + } finally { |
| 1794 | + IOUtils.closeQuietly(in); |
| 1795 | + } |
| 1796 | + // try again: |
| 1797 | + if (recalculatedChecksum.equals(dataFile.getChecksumValue())) { |
| 1798 | + fixed = true; |
| 1799 | + try { |
| 1800 | + storage.revertBackupAsAux(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); |
| 1801 | + } catch (IOException ioex) { |
| 1802 | + fixed = false; |
| 1803 | + } |
1791 | 1804 | }
|
1792 | 1805 | }
|
1793 | 1806 | }
|
1794 |
| - } |
1795 |
| - |
1796 |
| - if (!fixed) { |
1797 |
| - logger.info("checksum cannot be fixed"); |
1798 |
| - String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString())); |
1799 |
| - logger.log(Level.INFO, info); |
1800 |
| - throw new IOException(info); |
| 1807 | + |
| 1808 | + if (!fixed) { |
| 1809 | + logger.info("checksum cannot be fixed"); |
| 1810 | + String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString())); |
| 1811 | + logger.log(Level.INFO, info); |
| 1812 | + throw new IOException(info); |
| 1813 | + } |
1801 | 1814 | }
|
1802 | 1815 | }
|
1803 |
| - |
1804 | 1816 | logger.log(Level.INFO, "successfully validated DataFile {0}; checksum {1}", new Object[]{dataFile.getId(), recalculatedChecksum});
|
| 1817 | + |
1805 | 1818 | }
|
1806 | 1819 |
|
1807 | 1820 | public static String getStorageIdentifierFromLocation(String location) {
|
|
0 commit comments