Skip to content

Commit a5117e1

Browse files
committed
[PROD-17859] Properly handle opaque whiteout entries in Docker layer flattener
Flattened version of upstream PR google#110
1 parent 5dc1ab0 commit a5117e1

File tree

3 files changed

+193
-6
lines changed

3 files changed

+193
-6
lines changed

BUILD.bazel

+11
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,14 @@ sh_test(
101101
":pusher.par",
102102
],
103103
)
104+
105+
py_test(
106+
name = "client_v2_2_unit_tests",
107+
size = "large",
108+
srcs = [
109+
"client_v2_2_unit_tests.py",
110+
":containerregistry",
111+
],
112+
main = "client_v2_2_unit_tests.py",
113+
)
114+

client/v2_2/docker_image_.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -697,20 +697,18 @@ def __exit__(self, unused_type, unused_value, unused_traceback):
697697
pass
698698

699699

700-
def _in_whiteout_dir(
701-
fs,
702-
name
703-
):
700+
def _in_whiteout_dir(fs, opaque_whiteouts, name):
704701
while name:
705702
dirname = os.path.dirname(name)
706703
if name == dirname:
707704
break
708-
if fs.get(dirname):
705+
if fs.get(dirname) or dirname in opaque_whiteouts:
709706
return True
710707
name = dirname
711708
return False
712709

713710
_WHITEOUT_PREFIX = '.wh.'
711+
_OPAQUE_WHITEOUT_FILENAME = '.wh..wh..opq'
714712

715713

716714
def extract(image, tar):
@@ -724,17 +722,27 @@ def extract(image, tar):
724722
# to whether they are a tombstone or not.
725723
fs = {}
726724

725+
opaque_whiteouts_in_higher_layers = set()
726+
727727
# Walk the layers, topmost first and add files. If we've seen them in a
728728
# higher layer then we skip them
729729
for layer in image.diff_ids():
730730
buf = cStringIO.StringIO(image.uncompressed_layer(layer))
731731
with tarfile.open(mode='r:', fileobj=buf) as layer_tar:
732+
opaque_whiteouts_in_this_layer = []
732733
for tarinfo in layer_tar:
733734
# If we see a whiteout file, then don't add anything to the tarball
734735
# but ensure that any lower layers don't add a file with the whited
735736
# out name.
736737
basename = os.path.basename(tarinfo.name)
737738
dirname = os.path.dirname(tarinfo.name)
739+
740+
# If we see an opaque whiteout file, then don't add anything to the
741+
# tarball but ensure that any lower layers don't add files or
742+
# directories which are siblings of the whiteout file.
743+
if basename == _OPAQUE_WHITEOUT_FILENAME:
744+
opaque_whiteouts_in_this_layer.append(dirname)
745+
738746
tombstone = basename.startswith(_WHITEOUT_PREFIX)
739747
if tombstone:
740748
basename = basename[len(_WHITEOUT_PREFIX):]
@@ -746,7 +754,7 @@ def extract(image, tar):
746754
continue
747755

748756
# Check for a whited out parent directory
749-
if _in_whiteout_dir(fs, name):
757+
if _in_whiteout_dir(fs, opaque_whiteouts_in_higher_layers, name):
750758
continue
751759

752760
# Mark this file as handled by adding its name.
@@ -758,3 +766,4 @@ def extract(image, tar):
758766
tar.addfile(tarinfo, fileobj=layer_tar.extractfile(tarinfo))
759767
else:
760768
tar.addfile(tarinfo, fileobj=None)
769+
opaque_whiteouts_in_higher_layers.update(opaque_whiteouts_in_this_layer)

client_v2_2_unit_tests.py

+167
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
# Copyright 2018 Google Inc. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from collections import OrderedDict
16+
import io
17+
from StringIO import StringIO
18+
import tarfile
19+
import unittest
20+
21+
from containerregistry.client.v2_2 import docker_image as v2_2_image
22+
23+
24+
class MockImage(object):
25+
"""Mock of DockerImage, implementing only the methods called by extract()."""
26+
27+
def __init__(self):
28+
self._fs_layers = OrderedDict()
29+
30+
def add_layer(self, filenames):
31+
"""Add a layer to the image.
32+
33+
Args:
34+
filenames: a list of filenames or (filename, content) pairs. Filenames
35+
with trailing slashes become directory entries in the generated tar
36+
"""
37+
buf = io.BytesIO()
38+
with tarfile.open(mode='w:', fileobj=buf) as tf:
39+
for entry in filenames:
40+
if (isinstance(entry, basestring)):
41+
name = entry
42+
content = ""
43+
else:
44+
(name, content) = entry
45+
tarinfo = tarfile.TarInfo(name)
46+
tarinfo.size = len(content)
47+
if name.endswith("/"):
48+
tarinfo.type = tarfile.DIRTYPE
49+
tf.addfile(tarinfo, fileobj=(StringIO(content) if content else None))
50+
buf.seek(0)
51+
new_layer_id = str(len(self._fs_layers))
52+
self._fs_layers[new_layer_id] = buf.getvalue()
53+
54+
def diff_ids(self):
55+
return reversed(self._fs_layers.keys())
56+
57+
def uncompressed_layer(self, layer_id):
58+
return self._fs_layers[layer_id]
59+
60+
61+
class TestExtract(unittest.TestCase):
62+
63+
def _test_flatten(self, layer_filenames, expected_flattened_output):
64+
# Construct a mock DockerImage with the specified layers:
65+
img = MockImage()
66+
for filenames in layer_filenames:
67+
img.add_layer(filenames)
68+
buf = io.BytesIO()
69+
70+
# Run the actual extract logic:
71+
with tarfile.open(mode='w:', fileobj=buf) as tar:
72+
v2_2_image.extract(img, tar)
73+
74+
# Compare the extract() output to the expected results:
75+
buf.seek(0)
76+
flattened_output = []
77+
with tarfile.open(mode='r', fileobj=buf) as tar:
78+
for tarinfo in tar:
79+
if tarinfo.isdir():
80+
flattened_output.append(tarinfo.name + "/")
81+
else:
82+
contents = tar.extractfile(tarinfo).read()
83+
if contents:
84+
flattened_output.append((tarinfo.name, contents))
85+
else:
86+
flattened_output.append(tarinfo.name)
87+
self.assertEqual(flattened_output, expected_flattened_output)
88+
89+
def test_single_layer(self):
90+
self._test_flatten(
91+
[["/directory/", "/file"]],
92+
["/directory/", "/file"]
93+
)
94+
95+
def test_purely_additive_layers(self):
96+
self._test_flatten(
97+
[
98+
["dir/", "dir/file1", "file"],
99+
["dir/file2", "file2"]
100+
],
101+
["dir/file2", "file2", "dir/", "dir/file1", "file"]
102+
)
103+
104+
def test_highest_layer_of_file_takes_precedence(self):
105+
self._test_flatten(
106+
[
107+
[("file", "a")],
108+
[("file", "b")]
109+
],
110+
[("file", "b")]
111+
)
112+
113+
def test_single_file_whiteout(self):
114+
self._test_flatten(
115+
[
116+
["/foo"],
117+
["/.wh.foo"]
118+
],
119+
[]
120+
)
121+
122+
def test_parent_directory_whiteout(self):
123+
self._test_flatten(
124+
[
125+
["/x/a/", "/x/b/", "/x/b/1"],
126+
["/x/.wh.b"]
127+
],
128+
["/x/a/"]
129+
)
130+
131+
def test_opaque_whiteout(self):
132+
# Example from https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts
133+
self._test_flatten(
134+
[
135+
["a/", "a/b/", "a/b/c/", "a/b/c/bar"],
136+
["a/", "a/.wh..wh..opq", "a/b/", "a/b/c/", "a/b/c/foo"],
137+
],
138+
["a/", "a/b/", "a/b/c/", "a/b/c/foo"],
139+
)
140+
141+
self._test_flatten(
142+
[
143+
["a/", "a/b/", "a/b/c/", "a/b/c/bar"],
144+
["a/", "a/b/", "a/b/c/", "a/b/c/foo", "a/.wh..wh..opq"],
145+
],
146+
["a/", "a/b/", "a/b/c/", "a/b/c/foo"],
147+
)
148+
149+
def test_opaque_whiteout_preserves_parent_directory(self):
150+
# Example from https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts
151+
self._test_flatten(
152+
[
153+
[
154+
"bin/",
155+
"bin/my-app-binary",
156+
"bin/my-app-tools",
157+
"bin/tools/",
158+
"bin/tools/my-app-tool-one"
159+
],
160+
["bin/.wh..wh..opq"],
161+
],
162+
["bin/"],
163+
)
164+
165+
166+
if __name__ == "__main__":
167+
unittest.main(verbosity=2)

0 commit comments

Comments
 (0)