|
| 1 | +package htsjdk.samtools.cram; |
| 2 | + |
| 3 | +import htsjdk.HtsjdkTest; |
| 4 | +import htsjdk.beta.plugin.IOUtils; |
| 5 | +import htsjdk.io.HtsPath; |
| 6 | +import htsjdk.io.IOPath; |
| 7 | +import htsjdk.samtools.SAMRecord; |
| 8 | +import htsjdk.samtools.SamReader; |
| 9 | +import htsjdk.samtools.SamReaderFactory; |
| 10 | +import htsjdk.samtools.ValidationStringency; |
| 11 | +import htsjdk.samtools.cram.build.CramIO; |
| 12 | +import htsjdk.samtools.cram.common.CramVersions; |
| 13 | +import htsjdk.samtools.util.FileExtensions; |
| 14 | +import htsjdk.utils.SamtoolsTestUtils; |
| 15 | +import org.testng.Assert; |
| 16 | +import org.testng.annotations.DataProvider; |
| 17 | +import org.testng.annotations.Test; |
| 18 | + |
| 19 | +import java.io.IOException; |
| 20 | +import java.io.InputStream; |
| 21 | +import java.nio.file.Path; |
| 22 | +import java.util.Iterator; |
| 23 | + |
| 24 | +// A Command line program for large-scale Carrot round-trip CRAM tests, plus some tests for the command line app. |
| 25 | +public class CarrotTest extends HtsjdkTest { |
| 26 | + private static String CONVERT_USAGE = "convert reads_file reference samtools_view_params"; |
| 27 | + private static String COMPARE_USAGE = "compare reads_file1 reads_file2 reference"; |
| 28 | + final static int EXIT_CODE_SUCCESS = 0; |
| 29 | + final static int EXIT_CODE_DIFFS = 1; |
| 30 | + final static int EXIT_CODE_FAILURE = -1; |
| 31 | + |
| 32 | + // supported CLP commands - the string representation of these values are the actual command strings |
| 33 | + public enum CLPCommand { |
| 34 | + CONVERT, // convert an input to CRAM 3.1 using samtools |
| 35 | + COMPARE // compare two crams for fidelity |
| 36 | + }; |
| 37 | + |
| 38 | + // 1) convert to CRAM 3.1 using samtools: |
| 39 | + // inputs: |
| 40 | + // convert |
| 41 | + // input reads file (bam or cram) |
| 42 | + // output reads file |
| 43 | + // reference |
| 44 | + // samtools params |
| 45 | + // 2) compare: |
| 46 | + // inputs: |
| 47 | + // compare |
| 48 | + // reads file 1 (bam or cram) |
| 49 | + // reads file 2 (bam or cram) |
| 50 | + // reference |
| 51 | + public static void main(final String[] args) { |
| 52 | + final String helpUsage = String.format("Usage: \"%s\" or \"%s\"", CONVERT_USAGE, COMPARE_USAGE); |
| 53 | + |
| 54 | + if (args.length < 1 || |
| 55 | + (!args[0].toUpperCase().equals(CLPCommand.CONVERT.toString()) && |
| 56 | + !args[0].toUpperCase().equals(CLPCommand.COMPARE.toString()))) { |
| 57 | + System.err.println(helpUsage); |
| 58 | + System.exit(EXIT_CODE_FAILURE); |
| 59 | + } else { |
| 60 | + switch (CLPCommand.valueOf(args[0].toUpperCase())) { |
| 61 | + case CONVERT: |
| 62 | + if (args.length < 5) { |
| 63 | + System.err.println(String.format("Usage: %s", CONVERT_USAGE)); |
| 64 | + System.exit(EXIT_CODE_FAILURE); |
| 65 | + } else { |
| 66 | + System.out.println(String.format("Converting %s to %s using samtools with (%s).", args[1], args[2], args[4])); |
| 67 | + SamtoolsTestUtils.convertToCRAM( |
| 68 | + new HtsPath(args[1]), |
| 69 | + new HtsPath(args[2]), |
| 70 | + new HtsPath(args[3]), |
| 71 | + args[4]); |
| 72 | + System.out.println(String.format("Done converting %s to %s using samtools.", args[1], args[2])); |
| 73 | + } |
| 74 | + break; |
| 75 | + |
| 76 | + case COMPARE: |
| 77 | + if (args.length < 4) { |
| 78 | + System.err.println(String.format("Usage: %s", COMPARE_USAGE)); |
| 79 | + System.exit(EXIT_CODE_FAILURE); |
| 80 | + } else { |
| 81 | + System.out.println(String.format("Comparing %s with %s.", args[1], args[2])); |
| 82 | + doCRAMCompare( |
| 83 | + new HtsPath(args[1]).toPath(), |
| 84 | + new HtsPath(args[2]).toPath(), |
| 85 | + new HtsPath(args[3]).toPath()); |
| 86 | + System.out.println(String.format("Done comparing %s to %s.", args[1], args[2])); |
| 87 | + } |
| 88 | + break; |
| 89 | + |
| 90 | + default: |
| 91 | + System.err.println(helpUsage); |
| 92 | + System.exit(EXIT_CODE_FAILURE); |
| 93 | + } |
| 94 | + } |
| 95 | + } |
| 96 | + |
| 97 | + @DataProvider(name = "carrotCLPConvertTestCases") |
| 98 | + public Object[][] carrotCLPConvertTestCases() { |
| 99 | + return new Object[][] { |
| 100 | + { |
| 101 | + "src/test/resources/htsjdk/samtools/cram/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.v3.0.samtools.cram", |
| 102 | + "src/test/resources/htsjdk/samtools/reference/human_g1k_v37.20.21.fasta.gz", |
| 103 | + "--output-fmt cram,version=3.1,archive" |
| 104 | + } |
| 105 | + }; |
| 106 | + } |
| 107 | + |
| 108 | + @Test(dataProvider = "carrotCLPConvertTestCases") |
| 109 | + private void testCarrotCLPConvert( |
| 110 | + final String inputCRAM, |
| 111 | + final String referenceFASTA, |
| 112 | + final String samtoolsArgs) throws IOException { |
| 113 | + final IOPath tempCRAMPath = IOUtils.createTempPath("carrotTemporaryCRAM", FileExtensions.CRAM); |
| 114 | + CarrotTest.main(new String[] { |
| 115 | + CLPCommand.CONVERT.toString(), |
| 116 | + inputCRAM, |
| 117 | + tempCRAMPath.getRawInputString(), |
| 118 | + referenceFASTA, |
| 119 | + samtoolsArgs}); |
| 120 | + try (final InputStream inputStream = tempCRAMPath.getInputStream()) { |
| 121 | + Assert.assertTrue(CramIO.readCramHeader(inputStream).getCRAMVersion().equals(CramVersions.CRAM_v3_1)); |
| 122 | + } |
| 123 | + } |
| 124 | + |
| 125 | + @DataProvider(name = "carrotCLPCompareTestCases") |
| 126 | + public Object[][] carrotCLPCompareTestCases() { |
| 127 | + return new Object[][] { |
| 128 | + { |
| 129 | + // no-op test case just to test the CLP - compare a file to itself |
| 130 | + "src/test/resources/htsjdk/samtools/cram/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.v3.0.samtools.cram", |
| 131 | + "src/test/resources/htsjdk/samtools/cram/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.v3.0.samtools.cram", |
| 132 | + "src/test/resources/htsjdk/samtools/reference/human_g1k_v37.20.21.fasta.gz", |
| 133 | + }, |
| 134 | + }; |
| 135 | + } |
| 136 | + |
| 137 | + @Test(dataProvider = "carrotCLPCompareTestCases") |
| 138 | + private void carrotCLPCompare( |
| 139 | + final String inputCRAM1, |
| 140 | + final String inputCRAM2, |
| 141 | + final String referenceFASTA) { |
| 142 | + CarrotTest.main(new String[] {CLPCommand.COMPARE.toString(), inputCRAM1, inputCRAM2, referenceFASTA}); |
| 143 | + } |
| 144 | + |
| 145 | + // return 0 if the files are equivalent, 1 if they are not |
| 146 | + public static int doCRAMCompare( |
| 147 | + final Path path1, |
| 148 | + final Path path2, |
| 149 | + final Path referencePath |
| 150 | + ) { |
| 151 | + int count = 0; |
| 152 | + int diffCount = 0; |
| 153 | + try (final SamReader reader1 = SamReaderFactory.makeDefault() |
| 154 | + .referenceSequence(referencePath) |
| 155 | + .validationStringency((ValidationStringency.SILENT)) |
| 156 | + .open(path1); |
| 157 | + final SamReader reader2 = SamReaderFactory.makeDefault() |
| 158 | + .referenceSequence(referencePath) |
| 159 | + .validationStringency((ValidationStringency.SILENT)) |
| 160 | + .open(path2); |
| 161 | + ) { |
| 162 | + final Iterator<SAMRecord> iterator2 = reader2.iterator(); |
| 163 | + for (final SAMRecord rec1 : reader1) { |
| 164 | + final SAMRecord rec2 = iterator2.next(); |
| 165 | + count++; |
| 166 | + if (!rec1.equals(rec2)) { |
| 167 | + diffCount++; |
| 168 | + if (diffCount < 100) { |
| 169 | + // for the first 100 pairs that differ, print out the reads for debugging |
| 170 | + System.out.println("Difference at read " + count); |
| 171 | + System.out.println("Read: " + rec1.getSAMString()); |
| 172 | + System.out.println("Read: " + rec2.getSAMString()); |
| 173 | + System.out.println(); |
| 174 | + } |
| 175 | + } |
| 176 | + if (count % 10000000 == 0) { |
| 177 | + System.out.println("Processed " + count + " records"); |
| 178 | + } |
| 179 | + } |
| 180 | + } catch (final IOException e) { |
| 181 | + throw new RuntimeException(e); |
| 182 | + } |
| 183 | + if (diffCount > 0) { |
| 184 | + System.out.println(String.format("%d reads differ", diffCount)); |
| 185 | + return EXIT_CODE_DIFFS; |
| 186 | + } else { |
| 187 | + System.out.println("No differences found!"); |
| 188 | + return EXIT_CODE_SUCCESS; |
| 189 | + } |
| 190 | + } |
| 191 | +} |
0 commit comments