Skip to content

Commit 0c8d91e

Browse files
committed
feat: Add archive extraction support for http(s)
This enables archive extraction for the zip format and the gz/tar/tar.gz formats. If a user provides an http(s) link to one of these formats, pullman will now automatically extract the contents into the destination directory. Signed-off-by: Paul Van Eck <[email protected]>
1 parent 473fff4 commit 0c8d91e

File tree

3 files changed

+425
-1
lines changed

3 files changed

+425
-1
lines changed

pullman/helpers.go

+191
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,61 @@
1414
package pullman
1515

1616
import (
17+
"archive/tar"
18+
"archive/zip"
19+
"bufio"
20+
"bytes"
21+
"compress/gzip"
1722
"fmt"
1823
"hash/fnv"
24+
"io"
1925
"os"
2026
"path/filepath"
27+
"strings"
2128
)
2229

30+
type FileFormat struct {
31+
MagicBytes []byte
32+
Offset int
33+
Extension string
34+
}
35+
36+
// Magic byte values pulled from: https://en.wikipedia.org/wiki/List_of_file_signatures
37+
var fileFormats = []FileFormat{
38+
{
39+
MagicBytes: []byte{0x75, 0x73, 0x74, 0x61, 0x72, 0x00, 0x30, 0x30},
40+
Offset: 257,
41+
Extension: "tar",
42+
},
43+
{
44+
MagicBytes: []byte{0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20, 0x00},
45+
Offset: 257,
46+
Extension: "tar",
47+
},
48+
{
49+
MagicBytes: []byte{0x1F, 0x8B},
50+
Offset: 0,
51+
Extension: "gz",
52+
},
53+
{
54+
MagicBytes: []byte{0x50, 0x4B, 0x03, 0x04},
55+
Offset: 0,
56+
Extension: "zip",
57+
},
58+
59+
{
60+
MagicBytes: []byte{0x50, 0x4B, 0x05, 0x06},
61+
Offset: 0,
62+
Extension: "zip",
63+
},
64+
65+
{
66+
MagicBytes: []byte{0x50, 0x4B, 0x07, 0x08},
67+
Offset: 0,
68+
Extension: "zip",
69+
},
70+
}
71+
2372
// OpenFile will check the path and the filesystem for mismatch errors
2473
func OpenFile(path string) (*os.File, error) {
2574
// resource paths need to be compatible with a local filesystem download
@@ -57,3 +106,145 @@ func HashStrings(strings ...string) string {
57106

58107
return fmt.Sprintf("%#x", h.Sum64())
59108
}
109+
110+
// Extract a zip file into the provided destination directory.
111+
func ExtractZip(filePath string, dest string) error {
112+
zipReader, err := zip.OpenReader(filePath)
113+
if err != nil {
114+
return fmt.Errorf("unable to open '%s' for reading: %w", filePath, err)
115+
}
116+
defer zipReader.Close()
117+
118+
prefix := filepath.Clean(dest) + string(os.PathSeparator)
119+
for _, zipFileEntry := range zipReader.File {
120+
destFilePath := filepath.Join(dest, zipFileEntry.Name)
121+
122+
// Zip slip vulnerability check
123+
if !strings.HasPrefix(destFilePath, prefix) {
124+
return fmt.Errorf("%s: illegal file path", destFilePath)
125+
}
126+
127+
if zipFileEntry.FileInfo().IsDir() {
128+
err = os.MkdirAll(destFilePath, 0755)
129+
if err != nil {
130+
return fmt.Errorf("error creating new directory %s", destFilePath)
131+
}
132+
continue
133+
}
134+
135+
file, fileErr := OpenFile(destFilePath)
136+
if fileErr != nil {
137+
return fmt.Errorf("unable to open local file '%s' for writing: %w", destFilePath, fileErr)
138+
}
139+
defer file.Close()
140+
141+
zippedRc, err := zipFileEntry.Open()
142+
if err != nil {
143+
return fmt.Errorf("error opening zip file entry: %w", err)
144+
}
145+
defer zippedRc.Close()
146+
147+
if _, err = io.Copy(file, zippedRc); err != nil {
148+
return fmt.Errorf("error writing zip resource to local file '%s': %w", destFilePath, err)
149+
}
150+
151+
}
152+
return nil
153+
}
154+
155+
// Extract a tar archive file into the provided destination directory.
156+
func ExtractTar(filePath string, dest string) error {
157+
tarFile, err := os.Open(filePath)
158+
if err != nil {
159+
return fmt.Errorf("unable to open '%s' for reading: %w", filePath, err)
160+
}
161+
defer tarFile.Close()
162+
163+
tr := tar.NewReader(tarFile)
164+
for {
165+
header, err := tr.Next()
166+
167+
if err == io.EOF {
168+
break
169+
}
170+
171+
if err != nil {
172+
return fmt.Errorf("error reading tar archive entry: %w", err)
173+
}
174+
175+
if header == nil {
176+
continue
177+
}
178+
179+
destFilePath := filepath.Join(dest, header.Name)
180+
if header.Typeflag == tar.TypeDir {
181+
err = os.MkdirAll(destFilePath, 0755)
182+
if err != nil {
183+
return fmt.Errorf("error creating new directory %s", destFilePath)
184+
}
185+
continue
186+
}
187+
188+
file, fileErr := OpenFile(destFilePath)
189+
if fileErr != nil {
190+
return fmt.Errorf("unable to open local file '%s' for writing: %w", destFilePath, fileErr)
191+
}
192+
defer file.Close()
193+
if _, err = io.Copy(file, tr); err != nil {
194+
return fmt.Errorf("error writing tar resource to local file '%s': %w", destFilePath, err)
195+
}
196+
}
197+
return nil
198+
}
199+
200+
// Extract a gzip compressed file into the provided destination file path.
201+
func ExtractGzip(filePath string, dest string) error {
202+
gzipFile, err := os.Open(filePath)
203+
if err != nil {
204+
return fmt.Errorf("unable to open '%s' for reading: %w", filePath, err)
205+
}
206+
defer gzipFile.Close()
207+
gzr, err := gzip.NewReader(gzipFile)
208+
if err != nil {
209+
return fmt.Errorf("unable to create gzip reader: %w", err)
210+
}
211+
defer gzr.Close()
212+
213+
file, fileErr := OpenFile(dest)
214+
if fileErr != nil {
215+
return fmt.Errorf("unable to open local file '%s' for writing: %w", dest, fileErr)
216+
}
217+
defer file.Close()
218+
219+
if _, err = io.Copy(file, gzr); err != nil {
220+
return fmt.Errorf("error writing gzip resource to local file '%s': %w", dest, err)
221+
}
222+
223+
return nil
224+
}
225+
226+
// Get the file type based on the first few hundred bytes of the stream.
227+
// If the file isn't one of the expected formats, nil is returned.
228+
// If an error occurs while determining the file format, nil is returned.
229+
func GetFileFormat(filePath string) *FileFormat {
230+
231+
file, err := os.Open(filePath)
232+
if err != nil {
233+
return nil
234+
}
235+
defer file.Close()
236+
237+
r := bufio.NewReader(file)
238+
n := 264
239+
fileBytes, err := r.Peek(n)
240+
if err != nil {
241+
return nil
242+
}
243+
244+
for _, format := range fileFormats {
245+
if bytes.Equal(fileBytes[format.Offset:format.Offset+len(format.MagicBytes)], format.MagicBytes) {
246+
return &format
247+
}
248+
}
249+
return nil
250+
}

pullman/helpers_test.go

+191
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
// Copyright 2022 IBM Corporation
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
package pullman
15+
16+
import (
17+
"archive/tar"
18+
"archive/zip"
19+
"bytes"
20+
"compress/gzip"
21+
"io/ioutil"
22+
"os"
23+
"path/filepath"
24+
"strings"
25+
"testing"
26+
27+
"github.com/stretchr/testify/assert"
28+
"sigs.k8s.io/controller-runtime/pkg/log/zap"
29+
)
30+
31+
const generatedDirectory = "generated"
32+
33+
var zipFilePath = filepath.Join(generatedDirectory, "test-archive.zip")
34+
var tarFilePath = filepath.Join(generatedDirectory, "test-archive.tar")
35+
var tarGzFilePath = filepath.Join(generatedDirectory, "test-archive.tar.gz")
36+
var log = zap.New(zap.UseDevMode(true))
37+
38+
var files = []struct {
39+
Name, Body string
40+
}{
41+
{"nested/path/file1.txt", "Foo"},
42+
{"file2.txt", "Bar"},
43+
{"file3.txt", "Fun"},
44+
}
45+
46+
func generateZip() {
47+
buf := new(bytes.Buffer)
48+
zipWriter := zip.NewWriter(buf)
49+
50+
for _, file := range files {
51+
f, err := zipWriter.Create(file.Name)
52+
if err != nil {
53+
log.Error(err, "Failed to add file to test zip file")
54+
os.Exit(1)
55+
}
56+
_, err = f.Write([]byte(file.Body))
57+
if err != nil {
58+
log.Error(err, "Failed to write file to test zip file")
59+
os.Exit(1)
60+
}
61+
}
62+
63+
if err := zipWriter.Close(); err != nil {
64+
log.Error(err, "Failed to close zip writer")
65+
}
66+
67+
writeBytes(buf.Bytes(), zipFilePath)
68+
}
69+
70+
func generateTar() {
71+
buf := new(bytes.Buffer)
72+
tarWriter := tar.NewWriter(buf)
73+
defer tarWriter.Close()
74+
75+
for _, file := range files {
76+
header := &tar.Header{
77+
Name: file.Name,
78+
Mode: 0600,
79+
Size: int64(len(file.Body)),
80+
}
81+
82+
if err := tarWriter.WriteHeader(header); err != nil {
83+
log.Error(err, "Failed to write header to test tar file")
84+
os.Exit(1)
85+
}
86+
if _, err := tarWriter.Write([]byte(file.Body)); err != nil {
87+
log.Error(err, "Failed to write header to test tar file")
88+
os.Exit(1)
89+
}
90+
}
91+
92+
if err := tarWriter.Close(); err != nil {
93+
log.Error(err, "Failed to close tar writer")
94+
}
95+
96+
writeBytes(buf.Bytes(), tarFilePath)
97+
}
98+
99+
func generateTarGz() {
100+
buf := new(bytes.Buffer)
101+
gzipWriter := gzip.NewWriter(buf)
102+
defer gzipWriter.Close()
103+
tarWriter := tar.NewWriter(gzipWriter)
104+
defer tarWriter.Close()
105+
106+
for _, file := range files {
107+
header := &tar.Header{
108+
Name: file.Name,
109+
Mode: 0600,
110+
Size: int64(len(file.Body)),
111+
}
112+
113+
if err := tarWriter.WriteHeader(header); err != nil {
114+
log.Error(err, "Failed to write header to test tar.gz file")
115+
os.Exit(1)
116+
}
117+
if _, err := tarWriter.Write([]byte(file.Body)); err != nil {
118+
log.Error(err, "Failed to write header to test tar.gz file")
119+
os.Exit(1)
120+
}
121+
}
122+
123+
if err := gzipWriter.Close(); err != nil {
124+
log.Error(err, "Failed to close gzip writer")
125+
}
126+
127+
writeBytes(buf.Bytes(), tarGzFilePath)
128+
}
129+
130+
func writeBytes(bytes []byte, outputPath string) {
131+
if err := os.MkdirAll(filepath.Dir(outputPath), os.ModePerm); err != nil {
132+
log.Error(err, "Failed to create archive parent directories")
133+
os.Exit(1)
134+
}
135+
136+
if err := ioutil.WriteFile(outputPath, bytes, 0777); err != nil {
137+
log.Error(err, "Failed to write archive file to disk")
138+
os.Exit(1)
139+
}
140+
}
141+
142+
func tearDown() {
143+
os.RemoveAll(generatedDirectory)
144+
}
145+
146+
func Test_ExtractZip(t *testing.T) {
147+
generateZip()
148+
defer tearDown()
149+
150+
err := ExtractZip(zipFilePath, generatedDirectory)
151+
assert.NoError(t, err)
152+
153+
for _, file := range files {
154+
contents, err := os.ReadFile(filepath.Join(generatedDirectory, file.Name))
155+
assert.NoError(t, err)
156+
assert.Equal(t, file.Body, string(contents))
157+
}
158+
}
159+
160+
func Test_ExtractTar(t *testing.T) {
161+
generateTar()
162+
defer tearDown()
163+
164+
err := ExtractTar(tarFilePath, generatedDirectory)
165+
assert.NoError(t, err)
166+
167+
for _, file := range files {
168+
contents, err := os.ReadFile(filepath.Join(generatedDirectory, file.Name))
169+
assert.NoError(t, err)
170+
assert.Equal(t, file.Body, string(contents))
171+
}
172+
173+
}
174+
175+
func Test_ExtractTarGz(t *testing.T) {
176+
generateTarGz()
177+
defer tearDown()
178+
179+
newFilePath := strings.TrimSuffix(tarGzFilePath, ".gz")
180+
err := ExtractGzip(tarGzFilePath, newFilePath)
181+
assert.NoError(t, err)
182+
err = ExtractTar(newFilePath, generatedDirectory)
183+
assert.NoError(t, err)
184+
185+
for _, file := range files {
186+
contents, err := os.ReadFile(filepath.Join(generatedDirectory, file.Name))
187+
assert.NoError(t, err)
188+
assert.Equal(t, file.Body, string(contents))
189+
}
190+
191+
}

0 commit comments

Comments
 (0)