Skip to content

Commit f012878

Browse files
committed
Wire up CRAM 3.1 codecs for reading.
1 parent 7e26198 commit f012878

27 files changed

+809
-114
lines changed
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package htsjdk.beta.codecs.reads.cram.cramV3_1;
2+
3+
import htsjdk.beta.codecs.reads.cram.CRAMCodec;
4+
import htsjdk.beta.codecs.reads.cram.CRAMDecoder;
5+
import htsjdk.beta.codecs.reads.cram.CRAMEncoder;
6+
import htsjdk.beta.exception.HtsjdkIOException;
7+
import htsjdk.beta.io.bundle.Bundle;
8+
import htsjdk.beta.io.bundle.SignatureStream;
9+
import htsjdk.beta.plugin.HtsVersion;
10+
import htsjdk.beta.plugin.reads.ReadsDecoderOptions;
11+
import htsjdk.beta.plugin.reads.ReadsEncoderOptions;
12+
import htsjdk.samtools.cram.structure.CramHeader;
13+
14+
import java.io.IOException;
15+
import java.util.Arrays;
16+
17+
/**
18+
* CRAM v3.1 codec
19+
*/
20+
public class CRAMCodecV3_1 extends CRAMCodec {
21+
public static final HtsVersion VERSION_3_1 = new HtsVersion(3, 0, 1);
22+
private static final String CRAM_MAGIC_3_1 = new String(CramHeader.MAGIC) + "\3\1";
23+
24+
@Override
25+
public HtsVersion getVersion() {
26+
return VERSION_3_1;
27+
}
28+
29+
@Override
30+
public int getSignatureLength() {
31+
return CRAM_MAGIC_3_1.length();
32+
}
33+
34+
@Override
35+
public boolean canDecodeSignature(final SignatureStream signatureStream, final String sourceName) {
36+
try {
37+
final byte[] signatureBytes = new byte[getSignatureLength()];
38+
final int numRead = signatureStream.read(signatureBytes);
39+
if (numRead < getSignatureLength()) {
40+
throw new HtsjdkIOException(String.format("Failure reading content from stream for %s", sourceName));
41+
}
42+
return Arrays.equals(signatureBytes, getSignatureString().getBytes());
43+
} catch (IOException e) {
44+
throw new HtsjdkIOException(String.format("Failure reading content from stream for %s", sourceName));
45+
}
46+
}
47+
48+
@Override
49+
public CRAMDecoder getDecoder(final Bundle inputBundle, final ReadsDecoderOptions readsDecoderOptions) {
50+
return new CRAMDecoderV3_1(inputBundle, readsDecoderOptions);
51+
}
52+
53+
@Override
54+
public CRAMEncoder getEncoder(final Bundle outputBundle, final ReadsEncoderOptions readsEncoderOptions) {
55+
return new CRAMEncoderV3_1(outputBundle, readsEncoderOptions);
56+
}
57+
58+
@Override
59+
protected String getSignatureString() { return CRAM_MAGIC_3_1; }
60+
61+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package htsjdk.beta.codecs.reads.cram.cramV3_1;
2+
3+
import htsjdk.beta.codecs.reads.cram.CRAMDecoder;
4+
import htsjdk.beta.io.bundle.Bundle;
5+
import htsjdk.beta.io.bundle.BundleResourceType;
6+
import htsjdk.beta.plugin.HtsVersion;
7+
import htsjdk.beta.plugin.reads.ReadsDecoderOptions;
8+
9+
/**
10+
* CRAM v3.1 decoder.
11+
*/
12+
public class CRAMDecoderV3_1 extends CRAMDecoder {
13+
14+
/**
15+
* Create a new CRAM V3.1 decoder. The primary resource in the input
16+
* bundle must have content type {@link BundleResourceType#CT_ALIGNED_READS} (to find a decoder for a bundle,
17+
* see {@link htsjdk.beta.plugin.registry.ReadsResolver}).
18+
*
19+
* @param bundle input {@link Bundle} to decode
20+
* @param readsDecoderOptions {@link ReadsDecoderOptions} to use
21+
*/
22+
public CRAMDecoderV3_1(final Bundle bundle, final ReadsDecoderOptions readsDecoderOptions) {
23+
super(bundle, readsDecoderOptions);
24+
}
25+
26+
@Override
27+
public HtsVersion getVersion() {
28+
return CRAMCodecV3_1.VERSION_3_1;
29+
}
30+
31+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package htsjdk.beta.codecs.reads.cram.cramV3_1;
2+
3+
import htsjdk.beta.codecs.reads.cram.CRAMEncoder;
4+
import htsjdk.beta.io.bundle.Bundle;
5+
import htsjdk.beta.io.bundle.BundleResourceType;
6+
import htsjdk.beta.plugin.HtsVersion;
7+
import htsjdk.beta.plugin.reads.ReadsEncoderOptions;
8+
9+
/**
10+
* CRAM v3.1 encoder.
11+
*/
12+
public class CRAMEncoderV3_1 extends CRAMEncoder {
13+
14+
/**
15+
* Create a new CRAM v3.1 encoder for the given output bundle. The primary resource in the
16+
* bundle must have content type {@link BundleResourceType#CT_ALIGNED_READS} (to find an encoder for a bundle,
17+
* see {@link htsjdk.beta.plugin.registry.ReadsResolver}).
18+
*
19+
* @param outputBundle output {@link Bundle} to encode
20+
* @param readsEncoderOptions {@link ReadsEncoderOptions} to use
21+
*/
22+
public CRAMEncoderV3_1(final Bundle outputBundle, final ReadsEncoderOptions readsEncoderOptions) {
23+
super(outputBundle, readsEncoderOptions);
24+
}
25+
26+
@Override
27+
public HtsVersion getVersion() {
28+
return CRAMCodecV3_1.VERSION_3_1;
29+
}
30+
31+
}

src/main/java/htsjdk/samtools/cram/common/CramVersions.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
public final class CramVersions {
77
public static final CRAMVersion CRAM_v2_1 = new CRAMVersion(2, 1);
88
public static final CRAMVersion CRAM_v3 = new CRAMVersion(3, 0);
9+
public static final CRAMVersion CRAM_v3_1 = new CRAMVersion(3, 1);
910

1011
final static Set<CRAMVersion> supportedCRAMVersions = new HashSet<CRAMVersion>() {{
1112
add(CRAM_v2_1);
1213
add(CRAM_v3);
14+
add(CRAM_v3_1);
1315
}};
1416

1517
/**

src/main/java/htsjdk/samtools/cram/compression/ExternalCompressor.java

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,18 @@
11
package htsjdk.samtools.cram.compression;
22

3+
import htsjdk.samtools.cram.compression.fqzcomp.FQZCompDecode;
4+
import htsjdk.samtools.cram.compression.fqzcomp.FQZCompEncode;
5+
import htsjdk.samtools.cram.compression.fqzcomp.FQZCompExternalCompressor;
6+
import htsjdk.samtools.cram.compression.nametokenisation.NameTokenisationDecode;
7+
import htsjdk.samtools.cram.compression.nametokenisation.NameTokenisationEncode;
8+
import htsjdk.samtools.cram.compression.nametokenisation.NameTokeniserExternalCompressor;
39
import htsjdk.samtools.cram.compression.range.RangeDecode;
410
import htsjdk.samtools.cram.compression.range.RangeEncode;
11+
import htsjdk.samtools.cram.compression.range.RangeExternalCompressor;
512
import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode;
613
import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode;
14+
import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Decode;
15+
import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Encode;
716
import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
817
import htsjdk.utils.ValidationUtils;
918

@@ -74,14 +83,25 @@ public static ExternalCompressor getCompressorForMethod(
7483

7584
case RANS:
7685
return compressorSpecificArg == NO_COMPRESSION_ARG ?
77-
new RANSExternalCompressor(new RANS4x8Encode(), new RANS4x8Decode()) :
78-
new RANSExternalCompressor(compressorSpecificArg, new RANS4x8Encode(), new RANS4x8Decode());
86+
new RANS4x8ExternalCompressor(new RANS4x8Encode(), new RANS4x8Decode()) :
87+
new RANS4x8ExternalCompressor(compressorSpecificArg, new RANS4x8Encode(), new RANS4x8Decode());
7988

80-
case RANGE:
89+
case RANSNx16:
90+
return compressorSpecificArg == NO_COMPRESSION_ARG ?
91+
new RANSNx16ExternalCompressor(new RANSNx16Encode(), new RANSNx16Decode()) :
92+
new RANSNx16ExternalCompressor(compressorSpecificArg, new RANSNx16Encode(), new RANSNx16Decode());
93+
94+
case ADAPTIVE_ARITHMETIC:
8195
return compressorSpecificArg == NO_COMPRESSION_ARG ?
8296
new RangeExternalCompressor(new RangeEncode(), new RangeDecode()) :
8397
new RangeExternalCompressor(compressorSpecificArg, new RangeEncode(), new RangeDecode());
8498

99+
case NAME_TOKENISER:
100+
return new NameTokeniserExternalCompressor(new NameTokenisationEncode(), new NameTokenisationDecode());
101+
102+
case FQZCOMP:
103+
return new FQZCompExternalCompressor(new FQZCompEncode(), new FQZCompDecode());
104+
85105
case BZIP2:
86106
ValidationUtils.validateArg(
87107
compressorSpecificArg == NO_COMPRESSION_ARG,

src/main/java/htsjdk/samtools/cram/compression/RANSExternalCompressor.java renamed to src/main/java/htsjdk/samtools/cram/compression/RANS4x8ExternalCompressor.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
import java.nio.ByteBuffer;
3434
import java.util.Objects;
3535

36-
public final class RANSExternalCompressor extends ExternalCompressor {
36+
public final class RANS4x8ExternalCompressor extends ExternalCompressor {
3737
private final RANSParams.ORDER order;
3838
private final RANS4x8Encode ransEncode;
3939
private final RANS4x8Decode ransDecode;
@@ -42,20 +42,20 @@ public final class RANSExternalCompressor extends ExternalCompressor {
4242
* We use a shared RANS instance for all compressors.
4343
* @param rans
4444
*/
45-
public RANSExternalCompressor(
45+
public RANS4x8ExternalCompressor(
4646
final RANS4x8Encode ransEncode,
4747
final RANS4x8Decode ransDecode) {
4848
this(RANSParams.ORDER.ZERO, ransEncode, ransDecode);
4949
}
5050

51-
public RANSExternalCompressor(
51+
public RANS4x8ExternalCompressor(
5252
final int order,
5353
final RANS4x8Encode ransEncode,
5454
final RANS4x8Decode ransDecode) {
5555
this(RANSParams.ORDER.fromInt(order), ransEncode, ransDecode);
5656
}
5757

58-
public RANSExternalCompressor(
58+
public RANS4x8ExternalCompressor(
5959
final RANSParams.ORDER order,
6060
final RANS4x8Encode ransEncode,
6161
final RANS4x8Decode ransDecode) {
@@ -88,7 +88,7 @@ public boolean equals(Object o) {
8888
if (this == o) return true;
8989
if (o == null || getClass() != o.getClass()) return false;
9090

91-
RANSExternalCompressor that = (RANSExternalCompressor) o;
91+
RANS4x8ExternalCompressor that = (RANS4x8ExternalCompressor) o;
9292

9393
return this.order == that.order;
9494
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
package htsjdk.samtools.cram.compression;
2+
3+
import htsjdk.samtools.cram.compression.rans.RANSParams;
4+
import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Decode;
5+
import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Encode;
6+
import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Params;
7+
import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
8+
9+
import java.nio.ByteBuffer;
10+
import java.util.Objects;
11+
12+
public final class RANSNx16ExternalCompressor extends ExternalCompressor {
13+
private final int flags;
14+
private final RANSNx16Encode ransEncode;
15+
private final RANSNx16Decode ransDecode;
16+
17+
/**
18+
* We use a shared RANS instance for all compressors.
19+
*
20+
* @param ransEncode
21+
* @param ransDecode
22+
*/
23+
public RANSNx16ExternalCompressor(
24+
final RANSNx16Encode ransEncode,
25+
final RANSNx16Decode ransDecode) {
26+
//TODO: fix this constructor call to use strongly typed flag value
27+
this(0, ransEncode, ransDecode);
28+
}
29+
30+
public RANSNx16ExternalCompressor(
31+
final int flags,
32+
final RANSNx16Encode ransEncode,
33+
final RANSNx16Decode ransDecode) {
34+
super(BlockCompressionMethod.RANS);
35+
this.ransEncode = ransEncode;
36+
this.ransDecode = ransDecode;
37+
this.flags = flags;
38+
}
39+
40+
@Override
41+
public byte[] compress(final byte[] data) {
42+
final RANSNx16Params params = new RANSNx16Params(flags);
43+
final ByteBuffer buffer = ransEncode.compress(CompressionUtils.wrap(data), params);
44+
return toByteArray(buffer);
45+
}
46+
47+
@Override
48+
public byte[] uncompress(byte[] data) {
49+
final ByteBuffer buf = ransDecode.uncompress(CompressionUtils.wrap(data));
50+
return toByteArray(buf);
51+
}
52+
53+
@Override
54+
public String toString() {
55+
return String.format("%s(%x)", this.getMethod(), flags);
56+
}
57+
58+
@Override
59+
public boolean equals(Object o) {
60+
if (this == o) return true;
61+
if (o == null || getClass() != o.getClass()) return false;
62+
63+
RANSNx16ExternalCompressor that = (RANSNx16ExternalCompressor) o;
64+
65+
return this.flags == that.flags;
66+
}
67+
68+
@Override
69+
public int hashCode() {
70+
return Objects.hash(getMethod(), flags);
71+
}
72+
73+
private byte[] toByteArray(final ByteBuffer buffer) {
74+
if (buffer.hasArray() && buffer.arrayOffset() == 0 && buffer.array().length == buffer.limit()) {
75+
return buffer.array();
76+
}
77+
78+
final byte[] bytes = new byte[buffer.remaining()];
79+
buffer.get(bytes);
80+
return bytes;
81+
}
82+
83+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package htsjdk.samtools.cram.compression.fqzcomp;
2+
3+
import htsjdk.samtools.cram.compression.CompressionUtils;
4+
import htsjdk.samtools.cram.compression.ExternalCompressor;
5+
import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
6+
import htsjdk.utils.ValidationUtils;
7+
8+
public class FQZCompExternalCompressor extends ExternalCompressor {
9+
10+
// TODO: this codec is not yet full implemented; it is decode only
11+
private final FQZCompDecode fqzCompDecoder;
12+
13+
public FQZCompExternalCompressor(
14+
final FQZCompEncode unused_fqzCompEncoder,
15+
final FQZCompDecode fqzCompDecoder) {
16+
super(BlockCompressionMethod.FQZCOMP);
17+
this.fqzCompDecoder = fqzCompDecoder;
18+
}
19+
20+
@Override
21+
public byte[] compress(byte[] data) {
22+
throw new UnsupportedOperationException("FQZComp compression is not implemented");
23+
}
24+
25+
@Override
26+
public byte[] uncompress(byte[] data) {
27+
return fqzCompDecoder.uncompress(CompressionUtils.wrap(data)).array();
28+
}
29+
30+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package htsjdk.samtools.cram.compression.nametokenisation;
2+
3+
import htsjdk.samtools.cram.compression.CompressionUtils;
4+
import htsjdk.samtools.cram.compression.ExternalCompressor;
5+
6+
import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
7+
8+
import java.nio.ByteBuffer;
9+
10+
public class NameTokeniserExternalCompressor extends ExternalCompressor {
11+
12+
private final NameTokenisationEncode nameTokEncoder;
13+
private final NameTokenisationDecode nameTokDecoder;
14+
15+
public NameTokeniserExternalCompressor(
16+
final NameTokenisationEncode nameTokEncoder,
17+
final NameTokenisationDecode nameTokDecoder) {
18+
super(BlockCompressionMethod.NAME_TOKENISER);
19+
this.nameTokEncoder = nameTokEncoder;
20+
this.nameTokDecoder = nameTokDecoder;
21+
}
22+
23+
@Override
24+
public byte[] compress(byte[] data) {
25+
//TODO: this method needs a useArith flag; for now use false
26+
27+
// Arith coding is typically 1-5% smaller, but around 50-100% slower
28+
final ByteBuffer buffer = nameTokEncoder.compress(
29+
CompressionUtils.wrap(data),
30+
false,
31+
NameTokenisationDecode.NAME_SEPARATOR);
32+
return buffer.array();
33+
}
34+
35+
@Override
36+
public byte[] uncompress(byte[] data) {
37+
return nameTokDecoder.uncompress(CompressionUtils.wrap(data),
38+
NameTokenisationDecode.NAME_SEPARATOR);
39+
}
40+
41+
}

0 commit comments

Comments
 (0)