Skip to content

Commit e1679c1

Browse files
Merge pull request #2343 from giuseppe/convert-to-zstd-without-compression
chunked: allow conversion without zstd compression
2 parents b454479 + 87c6994 commit e1679c1

File tree

6 files changed

+187
-42
lines changed

6 files changed

+187
-42
lines changed

pkg/chunked/compression_linux.go

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,10 @@ func openTmpFileNoTmpFile(tmpDir string) (*os.File, error) {
185185
// Returns (manifest blob, parsed manifest, tar-split file or nil, manifest offset).
186186
// The opened tar-split file’s position is unspecified.
187187
// It may return an error matching ErrFallbackToOrdinaryLayerDownload / errFallbackCanConvert.
188-
func readZstdChunkedManifest(tmpDir string, blobStream ImageSourceSeekable, tocDigest digest.Digest, annotations map[string]string) (_ []byte, _ *minimal.TOC, _ *os.File, _ int64, retErr error) {
188+
// The compressed parameter indicates whether the manifest and tar-split data are zstd-compressed
189+
// (true) or stored uncompressed (false). Uncompressed data is used only for an optimization to convert
190+
// a regular OCI layer to zstd:chunked when convert_images is set, and it is not used for distributed images.
191+
func readZstdChunkedManifest(tmpDir string, blobStream ImageSourceSeekable, tocDigest digest.Digest, annotations map[string]string, compressed bool) (_ []byte, _ *minimal.TOC, _ *os.File, _ int64, retErr error) {
189192
offsetMetadata := annotations[minimal.ManifestInfoKey]
190193
if offsetMetadata == "" {
191194
return nil, nil, nil, 0, fmt.Errorf("%q annotation missing", minimal.ManifestInfoKey)
@@ -261,7 +264,7 @@ func readZstdChunkedManifest(tmpDir string, blobStream ImageSourceSeekable, tocD
261264
return nil, nil, nil, 0, err
262265
}
263266

264-
decodedBlob, err := decodeAndValidateBlob(manifest, manifestLengthUncompressed, tocDigest.String())
267+
decodedBlob, err := decodeAndValidateBlob(manifest, manifestLengthUncompressed, tocDigest.String(), compressed)
265268
if err != nil {
266269
return nil, nil, nil, 0, fmt.Errorf("validating and decompressing TOC: %w", err)
267270
}
@@ -288,7 +291,7 @@ func readZstdChunkedManifest(tmpDir string, blobStream ImageSourceSeekable, tocD
288291
decodedTarSplit.Close()
289292
}
290293
}()
291-
if err := decodeAndValidateBlobToStream(tarSplit, decodedTarSplit, toc.TarSplitDigest.String()); err != nil {
294+
if err := decodeAndValidateBlobToStream(tarSplit, decodedTarSplit, toc.TarSplitDigest.String(), compressed); err != nil {
292295
return nil, nil, nil, 0, fmt.Errorf("validating and decompressing tar-split: %w", err)
293296
}
294297
// We use the TOC for creating on-disk files, but the tar-split for creating metadata
@@ -487,11 +490,15 @@ func validateBlob(blob []byte, expectedCompressedChecksum string) error {
487490
return nil
488491
}
489492

490-
func decodeAndValidateBlob(blob []byte, lengthUncompressed uint64, expectedCompressedChecksum string) ([]byte, error) {
493+
func decodeAndValidateBlob(blob []byte, lengthUncompressed uint64, expectedCompressedChecksum string, compressed bool) ([]byte, error) {
491494
if err := validateBlob(blob, expectedCompressedChecksum); err != nil {
492495
return nil, err
493496
}
494497

498+
if !compressed {
499+
return blob, nil
500+
}
501+
495502
decoder, err := zstd.NewReader(nil)
496503
if err != nil {
497504
return nil, err
@@ -502,11 +509,16 @@ func decodeAndValidateBlob(blob []byte, lengthUncompressed uint64, expectedCompr
502509
return decoder.DecodeAll(blob, b)
503510
}
504511

505-
func decodeAndValidateBlobToStream(blob []byte, w *os.File, expectedCompressedChecksum string) error {
512+
func decodeAndValidateBlobToStream(blob []byte, w *os.File, expectedCompressedChecksum string, compressed bool) error {
506513
if err := validateBlob(blob, expectedCompressedChecksum); err != nil {
507514
return err
508515
}
509516

517+
if !compressed {
518+
_, err := w.Write(blob)
519+
return err
520+
}
521+
510522
decoder, err := zstd.NewReader(bytes.NewReader(blob))
511523
if err != nil {
512524
return err

pkg/chunked/compressor/compressor.go

Lines changed: 46 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111

1212
"github.com/containers/storage/pkg/chunked/internal/minimal"
1313
"github.com/containers/storage/pkg/ioutils"
14-
"github.com/klauspost/compress/zstd"
1514
"github.com/opencontainers/go-digest"
1615
"github.com/vbatts/tar-split/archive/tar"
1716
"github.com/vbatts/tar-split/tar/asm"
@@ -202,15 +201,15 @@ type tarSplitData struct {
202201
compressed *bytes.Buffer
203202
digester digest.Digester
204203
uncompressedCounter *ioutils.WriteCounter
205-
zstd *zstd.Encoder
204+
zstd minimal.ZstdWriter
206205
packer storage.Packer
207206
}
208207

209-
func newTarSplitData(level int) (*tarSplitData, error) {
208+
func newTarSplitData(createZstdWriter minimal.CreateZstdWriterFunc) (*tarSplitData, error) {
210209
compressed := bytes.NewBuffer(nil)
211210
digester := digest.Canonical.Digester()
212211

213-
zstdWriter, err := minimal.ZstdWriterWithLevel(io.MultiWriter(compressed, digester.Hash()), level)
212+
zstdWriter, err := createZstdWriter(io.MultiWriter(compressed, digester.Hash()))
214213
if err != nil {
215214
return nil, err
216215
}
@@ -227,11 +226,11 @@ func newTarSplitData(level int) (*tarSplitData, error) {
227226
}, nil
228227
}
229228

230-
func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, reader io.Reader, level int) error {
229+
func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, reader io.Reader, createZstdWriter minimal.CreateZstdWriterFunc) error {
231230
// total written so far. Used to retrieve partial offsets in the file
232231
dest := ioutils.NewWriteCounter(destFile)
233232

234-
tarSplitData, err := newTarSplitData(level)
233+
tarSplitData, err := newTarSplitData(createZstdWriter)
235234
if err != nil {
236235
return err
237236
}
@@ -251,7 +250,7 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
251250

252251
buf := make([]byte, 4096)
253252

254-
zstdWriter, err := minimal.ZstdWriterWithLevel(dest, level)
253+
zstdWriter, err := createZstdWriter(dest)
255254
if err != nil {
256255
return err
257256
}
@@ -404,18 +403,11 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
404403
return err
405404
}
406405

407-
if err := zstdWriter.Flush(); err != nil {
408-
zstdWriter.Close()
409-
return err
410-
}
411406
if err := zstdWriter.Close(); err != nil {
412407
return err
413408
}
414409
zstdWriter = nil
415410

416-
if err := tarSplitData.zstd.Flush(); err != nil {
417-
return err
418-
}
419411
if err := tarSplitData.zstd.Close(); err != nil {
420412
return err
421413
}
@@ -427,7 +419,7 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
427419
UncompressedSize: tarSplitData.uncompressedCounter.Count,
428420
}
429421

430-
return minimal.WriteZstdChunkedManifest(dest, outMetadata, uint64(dest.Count), &ts, metadata, level)
422+
return minimal.WriteZstdChunkedManifest(dest, outMetadata, uint64(dest.Count), &ts, metadata, createZstdWriter)
431423
}
432424

433425
type zstdChunkedWriter struct {
@@ -454,7 +446,7 @@ func (w zstdChunkedWriter) Write(p []byte) (int, error) {
454446
}
455447
}
456448

457-
// zstdChunkedWriterWithLevel writes a zstd compressed tarball where each file is
449+
// makeZstdChunkedWriter writes a zstd compressed tarball where each file is
458450
// compressed separately so it can be addressed separately. Idea based on CRFS:
459451
// https://github.com/google/crfs
460452
// The difference with CRFS is that the zstd compression is used instead of gzip.
@@ -469,12 +461,12 @@ func (w zstdChunkedWriter) Write(p []byte) (int, error) {
469461
// [SKIPPABLE FRAME 1]: [ZSTD SKIPPABLE FRAME, SIZE=MANIFEST LENGTH][MANIFEST]
470462
// [SKIPPABLE FRAME 2]: [ZSTD SKIPPABLE FRAME, SIZE=16][MANIFEST_OFFSET][MANIFEST_LENGTH][MANIFEST_LENGTH_UNCOMPRESSED][MANIFEST_TYPE][CHUNKED_ZSTD_MAGIC_NUMBER]
471463
// MANIFEST_OFFSET, MANIFEST_LENGTH, MANIFEST_LENGTH_UNCOMPRESSED and CHUNKED_ZSTD_MAGIC_NUMBER are 64 bits unsigned in little endian format.
472-
func zstdChunkedWriterWithLevel(out io.Writer, metadata map[string]string, level int) (io.WriteCloser, error) {
464+
func makeZstdChunkedWriter(out io.Writer, metadata map[string]string, createZstdWriter minimal.CreateZstdWriterFunc) (io.WriteCloser, error) {
473465
ch := make(chan error, 1)
474466
r, w := io.Pipe()
475467

476468
go func() {
477-
ch <- writeZstdChunkedStream(out, metadata, r, level)
469+
ch <- writeZstdChunkedStream(out, metadata, r, createZstdWriter)
478470
_, _ = io.Copy(io.Discard, r) // Ordinarily writeZstdChunkedStream consumes all of r. If it fails, ensure the write end never blocks and eventually terminates.
479471
r.Close()
480472
close(ch)
@@ -493,5 +485,40 @@ func ZstdCompressor(r io.Writer, metadata map[string]string, level *int) (io.Wri
493485
level = &l
494486
}
495487

496-
return zstdChunkedWriterWithLevel(r, metadata, *level)
488+
createZstdWriter := func(dest io.Writer) (minimal.ZstdWriter, error) {
489+
return minimal.ZstdWriterWithLevel(dest, *level)
490+
}
491+
492+
return makeZstdChunkedWriter(r, metadata, createZstdWriter)
493+
}
494+
495+
type noCompression struct {
496+
dest io.Writer
497+
}
498+
499+
func (n *noCompression) Write(p []byte) (int, error) {
500+
return n.dest.Write(p)
501+
}
502+
503+
func (n *noCompression) Close() error {
504+
return nil
505+
}
506+
507+
func (n *noCompression) Flush() error {
508+
return nil
509+
}
510+
511+
func (n *noCompression) Reset(dest io.Writer) {
512+
n.dest = dest
513+
}
514+
515+
// NoCompression writes directly to the output file without any compression
516+
//
517+
// Such an output does not follow the zstd:chunked spec and cannot be generally consumed; this function
518+
// only exists for internal purposes and should not be called from outside c/storage.
519+
func NoCompression(r io.Writer, metadata map[string]string) (io.WriteCloser, error) {
520+
createZstdWriter := func(dest io.Writer) (minimal.ZstdWriter, error) {
521+
return &noCompression{dest: dest}, nil
522+
}
523+
return makeZstdChunkedWriter(r, metadata, createZstdWriter)
497524
}

pkg/chunked/compressor/compressor_test.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@ package compressor
33
import (
44
"bufio"
55
"bytes"
6+
"errors"
67
"io"
78
"testing"
9+
10+
"github.com/stretchr/testify/assert"
811
)
912

1013
func TestHole(t *testing.T) {
@@ -88,3 +91,82 @@ func TestTwoHoles(t *testing.T) {
8891
t.Error("didn't receive EOF")
8992
}
9093
}
94+
95+
func TestNoCompressionWrite(t *testing.T) {
96+
var buf bytes.Buffer
97+
nc := &noCompression{dest: &buf}
98+
99+
data := []byte("hello world")
100+
n, err := nc.Write(data)
101+
assert.NoError(t, err)
102+
assert.Equal(t, len(data), n)
103+
assert.Equal(t, data, buf.Bytes())
104+
105+
data2 := []byte(" again")
106+
n, err = nc.Write(data2)
107+
assert.NoError(t, err)
108+
assert.Equal(t, len(data2), n)
109+
assert.Equal(t, append(data, data2...), buf.Bytes())
110+
}
111+
112+
func TestNoCompressionClose(t *testing.T) {
113+
var buf bytes.Buffer
114+
nc := &noCompression{dest: &buf}
115+
err := nc.Close()
116+
assert.NoError(t, err)
117+
}
118+
119+
func TestNoCompressionFlush(t *testing.T) {
120+
var buf bytes.Buffer
121+
nc := &noCompression{dest: &buf}
122+
err := nc.Flush()
123+
assert.NoError(t, err)
124+
}
125+
126+
func TestNoCompressionReset(t *testing.T) {
127+
var buf1 bytes.Buffer
128+
nc := &noCompression{dest: &buf1}
129+
130+
data1 := []byte("initial data")
131+
_, err := nc.Write(data1)
132+
assert.NoError(t, err)
133+
assert.Equal(t, data1, buf1.Bytes())
134+
135+
err = nc.Close()
136+
assert.NoError(t, err)
137+
138+
var buf2 bytes.Buffer
139+
nc.Reset(&buf2)
140+
141+
data2 := []byte("new data")
142+
_, err = nc.Write(data2)
143+
assert.NoError(t, err)
144+
145+
assert.Equal(t, data1, buf1.Bytes(), "Buffer 1 should remain unchanged")
146+
assert.Equal(t, data2, buf2.Bytes(), "Buffer 2 should contain the new data")
147+
148+
err = nc.Close()
149+
assert.NoError(t, err)
150+
151+
// Test Reset with nil, though Write would panic, Reset itself should work
152+
nc.Reset(nil)
153+
assert.Nil(t, nc.dest)
154+
}
155+
156+
// Mock writer that returns an error on Write
157+
type errorWriter struct{}
158+
159+
func (ew *errorWriter) Write(p []byte) (n int, err error) {
160+
return 0, errors.New("mock write error")
161+
}
162+
163+
func TestNoCompressionWriteError(t *testing.T) {
164+
ew := &errorWriter{}
165+
nc := &noCompression{dest: ew}
166+
167+
data := []byte("hello world")
168+
n, err := nc.Write(data)
169+
assert.Error(t, err)
170+
assert.Equal(t, 0, n)
171+
assert.Equal(t, "mock write error", err.Error())
172+
}

pkg/chunked/internal/minimal/compression.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@ import (
2020
"github.com/vbatts/tar-split/archive/tar"
2121
)
2222

23+
// ZstdWriter is an interface that wraps standard io.WriteCloser and Reset() to reuse the compressor with a new writer.
24+
type ZstdWriter interface {
25+
io.WriteCloser
26+
Reset(dest io.Writer)
27+
}
28+
29+
// CreateZstdWriterFunc is a function that creates a ZstdWriter for the provided destination writer.
30+
type CreateZstdWriterFunc func(dest io.Writer) (ZstdWriter, error)
31+
2332
// TOC is short for Table of Contents and is used by the zstd:chunked
2433
// file format to effectively add an overall index into the contents
2534
// of a tarball; it also includes file metadata.
@@ -179,7 +188,7 @@ type TarSplitData struct {
179188
UncompressedSize int64
180189
}
181190

182-
func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, offset uint64, tarSplitData *TarSplitData, metadata []FileMetadata, level int) error {
191+
func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, offset uint64, tarSplitData *TarSplitData, metadata []FileMetadata, createZstdWriter CreateZstdWriterFunc) error {
183192
// 8 is the size of the zstd skippable frame header + the frame size
184193
const zstdSkippableFrameHeader = 8
185194
manifestOffset := offset + zstdSkippableFrameHeader
@@ -198,7 +207,7 @@ func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, off
198207
}
199208

200209
var compressedBuffer bytes.Buffer
201-
zstdWriter, err := ZstdWriterWithLevel(&compressedBuffer, level)
210+
zstdWriter, err := createZstdWriter(&compressedBuffer)
202211
if err != nil {
203212
return err
204213
}
@@ -244,7 +253,7 @@ func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, off
244253
return appendZstdSkippableFrame(dest, manifestDataLE)
245254
}
246255

247-
func ZstdWriterWithLevel(dest io.Writer, level int) (*zstd.Encoder, error) {
256+
func ZstdWriterWithLevel(dest io.Writer, level int) (ZstdWriter, error) {
248257
el := zstd.EncoderLevelFromZstd(level)
249258
return zstd.NewWriter(dest, zstd.WithEncoderLevel(el))
250259
}

0 commit comments

Comments
 (0)