Skip to content

Commit 1d8b082

Browse files
committed
Thread model context through CRAM write code.
1 parent 9167e78 commit 1d8b082

21 files changed

+54
-33
lines changed

src/main/java/htsjdk/samtools/cram/compression/BZIP2ExternalCompressor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
package htsjdk.samtools.cram.compression;
2626

2727
import htsjdk.samtools.cram.io.InputStreamUtils;
28+
import htsjdk.samtools.cram.structure.CRAMCodecModelContext;
2829
import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
2930
import htsjdk.samtools.util.IOUtil;
3031
import htsjdk.samtools.util.RuntimeIOException;
@@ -42,7 +43,7 @@ public BZIP2ExternalCompressor() {
4243
}
4344

4445
@Override
45-
public byte[] compress(final byte[] data) {
46+
public byte[] compress(final byte[] data, final CRAMCodecModelContext unused_contextModel) {
4647
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
4748
try (final BZip2CompressorOutputStream bos = new BZip2CompressorOutputStream(byteArrayOutputStream)) {
4849
IOUtil.copyStream(new ByteArrayInputStream(data), bos);

src/main/java/htsjdk/samtools/cram/compression/ExternalCompressor.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode;
1414
import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Decode;
1515
import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Encode;
16+
import htsjdk.samtools.cram.structure.CRAMCodecModelContext;
1617
import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
1718
import htsjdk.utils.ValidationUtils;
1819

@@ -26,7 +27,13 @@ protected ExternalCompressor(final BlockCompressionMethod method) {
2627
this.method = method;
2728
}
2829

29-
public abstract byte[] compress(byte[] data);
30+
/**
31+
* Compress the data using the codec-specific context model.
32+
* @param data the data to compress
33+
* @param contextModel the context model to use for compression; may be null
34+
* @return the compressed data
35+
*/
36+
public abstract byte[] compress(byte[] data, CRAMCodecModelContext contextModel);
3037

3138
public abstract byte[] uncompress(byte[] data);
3239

src/main/java/htsjdk/samtools/cram/compression/GZIPExternalCompressor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
import htsjdk.samtools.Defaults;
2828
import htsjdk.samtools.cram.io.InputStreamUtils;
29+
import htsjdk.samtools.cram.structure.CRAMCodecModelContext;
2930
import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
3031
import htsjdk.samtools.util.IOUtil;
3132
import htsjdk.samtools.util.RuntimeIOException;
@@ -62,7 +63,7 @@ public GZIPExternalCompressor(final int compressionLevel) {
6263
public int getWriteCompressionLevel() { return writeCompressionLevel; }
6364

6465
@Override
65-
public byte[] compress(final byte[] data) {
66+
public byte[] compress(final byte[] data, final CRAMCodecModelContext unused_contextModel) {
6667
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
6768
try (final GZIPOutputStream gos = new GZIPOutputStream(byteArrayOutputStream) {
6869
{

src/main/java/htsjdk/samtools/cram/compression/LZMAExternalCompressor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
package htsjdk.samtools.cram.compression;
2626

2727
import htsjdk.samtools.cram.io.InputStreamUtils;
28+
import htsjdk.samtools.cram.structure.CRAMCodecModelContext;
2829
import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
2930
import htsjdk.samtools.util.RuntimeIOException;
3031
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
@@ -41,7 +42,7 @@ public LZMAExternalCompressor() {
4142
}
4243

4344
@Override
44-
public byte[] compress(final byte[] data) {
45+
public byte[] compress(final byte[] data, final CRAMCodecModelContext unused_contextModel) {
4546
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(data.length * 2);
4647
try (final XZCompressorOutputStream xzCompressorOutputStream = new XZCompressorOutputStream(byteArrayOutputStream)) {
4748
xzCompressorOutputStream.write(data);

src/main/java/htsjdk/samtools/cram/compression/RANS4x8ExternalCompressor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode;
2929
import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode;
3030
import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params;
31+
import htsjdk.samtools.cram.structure.CRAMCodecModelContext;
3132
import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
3233

3334
import java.nio.ByteBuffer;
@@ -66,7 +67,7 @@ public RANS4x8ExternalCompressor(
6667
}
6768

6869
@Override
69-
public byte[] compress(final byte[] data) {
70+
public byte[] compress(final byte[] data, final CRAMCodecModelContext unused_contextModel) {
7071
final RANS4x8Params params = new RANS4x8Params(order);
7172
final ByteBuffer buffer = ransEncode.compress(CompressionUtils.wrap(data), params);
7273
return toByteArray(buffer);

src/main/java/htsjdk/samtools/cram/compression/RANSNx16ExternalCompressor.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
package htsjdk.samtools.cram.compression;
22

3-
import htsjdk.samtools.cram.compression.rans.RANSParams;
43
import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Decode;
54
import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Encode;
65
import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Params;
6+
import htsjdk.samtools.cram.structure.CRAMCodecModelContext;
77
import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
88

99
import java.nio.ByteBuffer;
@@ -38,7 +38,7 @@ public RANSNx16ExternalCompressor(
3838
}
3939

4040
@Override
41-
public byte[] compress(final byte[] data) {
41+
public byte[] compress(final byte[] data, final CRAMCodecModelContext unused_contextModel) {
4242
final RANSNx16Params params = new RANSNx16Params(flags);
4343
final ByteBuffer buffer = ransEncode.compress(CompressionUtils.wrap(data), params);
4444
return toByteArray(buffer);

src/main/java/htsjdk/samtools/cram/compression/RAWExternalCompressor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
*/
2525
package htsjdk.samtools.cram.compression;
2626

27+
import htsjdk.samtools.cram.structure.CRAMCodecModelContext;
2728
import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
2829

2930
/**
@@ -36,7 +37,7 @@ public RAWExternalCompressor() {
3637
}
3738

3839
@Override
39-
public byte[] compress(final byte[] data) {
40+
public byte[] compress(final byte[] data, final CRAMCodecModelContext unused_contextModel) {
4041
return data;
4142
}
4243

src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZCompExternalCompressor.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
import htsjdk.samtools.cram.compression.CompressionUtils;
44
import htsjdk.samtools.cram.compression.ExternalCompressor;
5+
import htsjdk.samtools.cram.structure.CRAMCodecModelContext;
56
import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
6-
import htsjdk.utils.ValidationUtils;
77

88
public class FQZCompExternalCompressor extends ExternalCompressor {
99

@@ -18,7 +18,7 @@ public FQZCompExternalCompressor(
1818
}
1919

2020
@Override
21-
public byte[] compress(byte[] data) {
21+
public byte[] compress(byte[] data, final CRAMCodecModelContext unused_contextModel) {
2222
throw new UnsupportedOperationException("FQZComp compression is not implemented");
2323
}
2424

src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokeniserExternalCompressor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import htsjdk.samtools.cram.compression.CompressionUtils;
44
import htsjdk.samtools.cram.compression.ExternalCompressor;
55

6+
import htsjdk.samtools.cram.structure.CRAMCodecModelContext;
67
import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
78

89
import java.nio.ByteBuffer;
@@ -21,7 +22,7 @@ public NameTokeniserExternalCompressor(
2122
}
2223

2324
@Override
24-
public byte[] compress(byte[] data) {
25+
public byte[] compress(byte[] data, final CRAMCodecModelContext unused_contextModel) {
2526

2627
// Arith coding is typically 1-5% smaller, but around 50-100% slower
2728
final ByteBuffer buffer = nameTokEncoder.compress(

src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar
7676
final byte[] rawBytes = new byte[inputBuffer.remaining()];
7777
inputBuffer.get(rawBytes, inBuffer.position(), inputBuffer.remaining());
7878
final BZIP2ExternalCompressor compressor = new BZIP2ExternalCompressor();
79-
final byte[] extCompressedBytes = compressor.compress(rawBytes);
79+
final byte[] extCompressedBytes = compressor.compress(rawBytes, null);
8080
outBuffer.put(extCompressedBytes);
8181
outBuffer.limit(outBuffer.position());
8282
outBuffer.rewind(); // set position to 0

src/main/java/htsjdk/samtools/cram/compression/range/RangeExternalCompressor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import htsjdk.samtools.cram.compression.CompressionUtils;
44
import htsjdk.samtools.cram.compression.ExternalCompressor;
5+
import htsjdk.samtools.cram.structure.CRAMCodecModelContext;
56
import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
67

78
import java.nio.ByteBuffer;
@@ -29,7 +30,7 @@ public RangeExternalCompressor(
2930
}
3031

3132
@Override
32-
public byte[] compress(byte[] data) {
33+
public byte[] compress(byte[] data, final CRAMCodecModelContext unused_contextModel) {
3334
final RangeParams params = new RangeParams(formatFlags);
3435
final ByteBuffer buffer = rangeEncode.compress(CompressionUtils.wrap(data), params);
3536
return toByteArray(buffer);

src/main/java/htsjdk/samtools/cram/encoding/writer/CramRecordWriter.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
/**
3131
* A writer that emits CRAMCompressionRecord into the various streams that represent a Slice's data series blocks.
32-
* This essentially acts as a bridge between CRAMCompressionRecord fields and the various various data series streams
32+
* This essentially acts as a bridge between CRAMCompressionRecord fields and the various data series streams
3333
* associated with a Slice. It is the inverse of CramRecordReader.
3434
*/
3535
public class CramRecordWriter {
@@ -137,13 +137,13 @@ public CramRecordWriter(final Slice slice)
137137
* @param initialAlignmentStart the alignmentStart of the enclosing {@link Slice}, for delta calculation
138138
* @return a {@link SliceBlocks} object
139139
*/
140-
public SliceBlocks writeToSliceBlocks(final List<CRAMCompressionRecord> records, final int initialAlignmentStart) {
140+
public SliceBlocks writeToSliceBlocks(final CRAMCodecModelContext contextModel, final List<CRAMCompressionRecord> records, final int initialAlignmentStart) {
141141
int prevAlignmentStart = initialAlignmentStart;
142142
for (final CRAMCompressionRecord record : records) {
143143
writeCRAMRecord(record, prevAlignmentStart);
144144
prevAlignmentStart = record.getAlignmentStart();
145145
}
146-
return sliceBlocksWriteStreams.flushStreamsToBlocks();
146+
return sliceBlocksWriteStreams.flushStreamsToBlocks(contextModel);
147147
}
148148

149149
/**
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
package htsjdk.samtools.cram.structure;
2+
3+
/**
4+
* Placeholder for context model data/accumulators for use by CRAM 3.1 codec write implementations.
5+
*/
6+
public class CRAMCodecModelContext {
7+
}

src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -218,13 +218,13 @@ public EncodingDescriptor getEncodingDescriptorForDataSeries(final DataSeries da
218218
* @param outputStream stream to compress
219219
* @return Block containing the compressed contends of the stream
220220
*/
221-
public Block createCompressedBlockForStream(final Integer contentId, final ByteArrayOutputStream outputStream) {
221+
public Block createCompressedBlockForStream(final CRAMCodecModelContext contextModel, final Integer contentId, final ByteArrayOutputStream outputStream) {
222222
final ExternalCompressor compressor = externalCompressors.get(contentId);
223223
final byte[] rawContent = outputStream.toByteArray();
224224
return Block.createExternalBlock(
225225
compressor.getMethod(),
226226
contentId,
227-
compressor.compress(rawContent),
227+
compressor.compress(rawContent, contextModel),
228228
rawContent.length);
229229
}
230230

@@ -284,17 +284,17 @@ public ExternalCompressor getBestExternalCompressor(final byte[] data, final CRA
284284
final ExternalCompressor gzip = compressorCache.getCompressorForMethod(
285285
BlockCompressionMethod.GZIP,
286286
encodingStrategy.getGZIPCompressionLevel());
287-
final int gzipLen = gzip.compress(data).length;
287+
final int gzipLen = gzip.compress(data, null).length;
288288

289289
final ExternalCompressor rans0 = compressorCache.getCompressorForMethod(
290290
BlockCompressionMethod.RANS,
291291
RANS4x8Params.ORDER.ZERO.ordinal());
292-
final int rans0Len = rans0.compress(data).length;
292+
final int rans0Len = rans0.compress(data,null).length;
293293

294294
final ExternalCompressor rans1 = compressorCache.getCompressorForMethod(
295295
BlockCompressionMethod.RANS,
296296
RANS4x8Params.ORDER.ONE.ordinal());
297-
final int rans1Len = rans1.compress(data).length;
297+
final int rans1Len = rans1.compress(data, null).length;
298298

299299
// find the best of general purpose codecs:
300300
final int minLen = Math.min(gzipLen, Math.min(rans0Len, rans1Len));

src/main/java/htsjdk/samtools/cram/structure/Slice.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
import htsjdk.utils.ValidationUtils;
4343

4444
import java.io.*;
45-
import java.lang.reflect.Array;
4645
import java.math.BigInteger;
4746
import java.util.*;
4847
import java.util.stream.Collectors;
@@ -107,6 +106,7 @@ public class Slice {
107106
private int byteSizeOfSliceBlocks = UNINITIALIZED_INDEXING_PARAMETER;
108107
private int landmarkIndex = UNINITIALIZED_INDEXING_PARAMETER;
109108

109+
private final CRAMCodecModelContext contextModel = new CRAMCodecModelContext();
110110
/**
111111
* Create a slice by reading a serialized Slice from an input stream.
112112
*
@@ -245,7 +245,7 @@ public Slice(
245245
this.globalRecordCounter = globalRecordCounter;
246246

247247
final CramRecordWriter writer = new CramRecordWriter(this);
248-
sliceBlocks = writer.writeToSliceBlocks(records, alignmentContext.getAlignmentStart());
248+
sliceBlocks = writer.writeToSliceBlocks(contextModel, records, alignmentContext.getAlignmentStart());
249249

250250
// we can't calculate the number of blocks until after the record writer has written everything out
251251
nSliceBlocks = caclulateNumberOfBlocks();

src/main/java/htsjdk/samtools/cram/structure/SliceBlocksWriteStreams.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,11 @@ public SliceBlocksWriteStreams(final CompressionHeader compressionHeader) {
7979
public ByteArrayOutputStream getExternalOutputStream(final Integer contentID) { return externalOutputStreams.get(contentID); }
8080

8181
/**
82-
* Compress and write each each stream to a corresponding Block (note that this does not write
82+
* Compress and write each stream to a corresponding Block (note that this does not write
8383
* the blocks themselves to a container output stream - that can't happen until the slice is aggregated
8484
* into a container.
8585
*/
86-
public SliceBlocks flushStreamsToBlocks() {
86+
public SliceBlocks flushStreamsToBlocks(final CRAMCodecModelContext contextModel) {
8787
closeAllStreams();
8888

8989
// core block is raw (no compression) and must be written first (prescribed by the spec)
@@ -94,7 +94,7 @@ public SliceBlocks flushStreamsToBlocks() {
9494
if (contentId.equals(Block.NO_CONTENT_ID)) {
9595
throw new CRAMException("A valid content ID is required. Given: " + contentId);
9696
}
97-
externalBlocks.add(compressionHeader.getEncodingMap().createCompressedBlockForStream(contentId, contentStream));
97+
externalBlocks.add(compressionHeader.getEncodingMap().createCompressedBlockForStream(contextModel, contentId, contentStream));
9898
});
9999

100100
return new SliceBlocks(coreBlock, externalBlocks);

src/main/java/htsjdk/samtools/cram/structure/block/Block.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ public static Block createGZIPFileHeaderBlock(final byte[] rawContent) {
129129
return new Block(
130130
BlockCompressionMethod.GZIP,
131131
BlockContentType.FILE_HEADER, NO_CONTENT_ID,
132-
(new GZIPExternalCompressor()).compress(rawContent),
132+
(new GZIPExternalCompressor()).compress(rawContent, null),
133133
rawContent.length);
134134
}
135135

src/test/java/htsjdk/samtools/cram/compression/ExternalCompressionTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ public void testCompressorsRoundTrip(
8585
final int compressorSpecificArg,
8686
final Class<ExternalCompressor> unused) {
8787
final ExternalCompressor compressor = ExternalCompressor.getCompressorForMethod(method, compressorSpecificArg);
88-
final byte [] compressed = compressor.compress(TEST_BYTES);
88+
final byte [] compressed = compressor.compress(TEST_BYTES, null);
8989
final byte [] restored = compressor.uncompress(compressed);
9090
Assert.assertEquals(TEST_BYTES, restored);
9191
}

src/test/java/htsjdk/samtools/cram/structure/SliceBlockWriteStreamTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ public void testSliceBlocksWriteStreamsRoundTrip() throws IOException {
4141
}
4242

4343
// close the streams and write them to compressed slice blocks
44-
final SliceBlocks sliceBlocks = sliceBlocksWriteStreams.flushStreamsToBlocks();
44+
final SliceBlocks sliceBlocks = sliceBlocksWriteStreams.flushStreamsToBlocks(new CRAMCodecModelContext());
4545

4646
// now verify all the blocks in Slice
4747
final byte[] coreRoundTripContent = sliceBlocks.getCoreBlock().getUncompressedContent(new CompressorCache());

src/test/java/htsjdk/samtools/cram/structure/SliceBlocksTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ static SliceBlocks getSliceBlocksForAllDataSeries(
134134
compressionMethod,
135135
compressionMethod == BlockCompressionMethod.RANS ?
136136
1 :
137-
ExternalCompressor.NO_COMPRESSION_ARG).compress(embeddedRefBlockContent),
137+
ExternalCompressor.NO_COMPRESSION_ARG).compress(embeddedRefBlockContent, null),
138138
embeddedRefBlockContent.length));
139139

140140
// add one external block for each Data Series
@@ -146,7 +146,7 @@ static SliceBlocks getSliceBlocksForAllDataSeries(
146146
dataSeries.getExternalBlockContentId(),
147147
ExternalCompressor.getCompressorForMethod(
148148
compressionMethod,
149-
ExternalCompressor.NO_COMPRESSION_ARG).compress(uncompressedContent.getBytes()),
149+
ExternalCompressor.NO_COMPRESSION_ARG).compress(uncompressedContent.getBytes(), null),
150150
dataSeries.getCanonicalName().getBytes().length));
151151
expectedExternalContentStrings.put(dataSeries.getExternalBlockContentId(), uncompressedContent);
152152
}

src/test/java/htsjdk/samtools/cram/structure/block/BlockTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ public static Object[][] rtProvider() {
6969
public void testFileHeaderBlockRoundTrips(final byte[] testData, final CRAMVersion cramVersion) throws IOException {
7070
final Block fhBlock = Block.createGZIPFileHeaderBlock(testData);
7171
final Block rtBlock = roundTrip(fhBlock, cramVersion);
72-
contentCheck(rtBlock, testData, (new GZIPExternalCompressor()).compress(testData));
72+
contentCheck(rtBlock, testData, (new GZIPExternalCompressor()).compress(testData, null));
7373
}
7474

7575
@Test(dataProvider = "RoundTripTest")
@@ -100,7 +100,7 @@ public void testExternalBlockRoundTrips() throws IOException {
100100
final int contentID = 5;
101101

102102
final byte[] uncompressedData = "A TEST STRING WITH REDUNDANCY AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA".getBytes();
103-
final byte[] compressedData = compressor.compress(uncompressedData);
103+
final byte[] compressedData = compressor.compress(uncompressedData,null);
104104

105105
final Block extBlock = Block.createExternalBlock(compressor.getMethod(), contentID, compressedData, uncompressedData.length);
106106

0 commit comments

Comments
 (0)