Skip to content

Commit 5eb788f

Browse files
committed
Fix a cornercase in choosing whether to copy or link a file.
In the present code, when we have two separate links to the same file outside of /etc/pki, the real file is copied to the location of both links. This means that when the user makes changes to the file in the future, they will have to edit both files whereas before they would only have had to edit one of the files (since they linked to the same underlying file). Example: Original: lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 fileA -> /etc/sourceFile lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 fileB -> /etc/sourceFile Becomes: -rw-r--r--. 1 badger badger 12 Jan 22 03:43 fileA -rw-r--r--. 1 badger badger 12 Jan 22 03:43 fileB This change makes it so the first link is copied and the second link links to that copy like this: -rw-r--r--. 1 badger badger 12 Jan 22 03:43 fileA lrwxrwxrwx. 1 badger badger 13 Jan 22 03:42 fileB -> /etc/pki/fileA
1 parent 41321a1 commit 5eb788f

File tree

2 files changed

+128
-93
lines changed

2 files changed

+128
-93
lines changed

repos/system_upgrade/common/actors/targetuserspacecreator/libraries/userspacegen.py

Lines changed: 92 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,62 @@ def _mkdir_with_copied_mode(path, mode_from):
348348
run(['chmod', '--reference={}'.format(mode_from), path])
349349

350350

351+
def _canonize(pointee_as_abspath, srcdir):
352+
"""
353+
Helper function to return canonical forms of directory paths.
354+
355+
This is an ugly function that just avoids having to duplicate this code in two places.
356+
"""
357+
# If srcdir is a symlink, then we need a name for it that we can compare
358+
# with other paths.
359+
canonical_srcdir = os.path.realpath(srcdir)
360+
361+
# To make comparisons, we need to resolve all symlinks in the directory
362+
# structure leading up to pointee. However, we can't include pointee
363+
# itself otherwise it will resolve to the file that it points to in the
364+
# end (which would be wrong if pointee_filename is a symlink).
365+
canonical_pointee_dir, pointee_filename = os.path.split(pointee_as_abspath)
366+
canonical_pointee_dir = os.path.realpath(canonical_pointee_dir)
367+
368+
return (pointee_filename, canonical_pointee_dir, canonical_srcdir)
369+
370+
371+
def _format_source_link(pointee_as_abspath, symlink, srcdir):
372+
"""
373+
Format the source for a symlink as either an absolute path or a path relative to srcdir.
374+
375+
:param pointee_as_abspath: Sbsolute path to the source of the symlink.
376+
:param symlink: Absolute path to the symlink that started us looking
377+
through the chain of links. If the symlink points to a relative path, this function will
378+
return a relative path. If it is an absolute path, it will return an absolute path.
379+
:param srcdir: The directory in the source tree that we want everything to live within.
380+
"""
381+
pointee_filename, canonical_pointee_dir, canonical_srcdir = _canonize(pointee_as_abspath, srcdir)
382+
link_to = os.readlink(symlink)
383+
384+
if link_to.startswith('/'):
385+
# The original symlink was an absolute path so we will set this
386+
# one to absolute too
387+
# Note: Because absolute paths are constructed inside of
388+
# srcdir, the relative path that we need to join with has to be
389+
# relative to srcdir, not the directory that the symlink is
390+
# being created in.
391+
relative_to_srcdir = os.path.relpath(canonical_pointee_dir, canonical_srcdir)
392+
corrected_path = os.path.normpath(os.path.join(srcdir,
393+
relative_to_srcdir,
394+
pointee_filename)
395+
)
396+
else:
397+
# If the original link is a relative link, then we want the new
398+
# link to be relative (to the path that we will place the symlink in) as well
399+
canonical_symlink_dir = os.path.realpath(os.path.dirname(symlink))
400+
relative_to_symlink_location = os.path.relpath(canonical_pointee_dir, canonical_symlink_dir)
401+
corrected_path = os.path.normpath(os.path.join(relative_to_symlink_location,
402+
pointee_filename)
403+
)
404+
return corrected_path
405+
406+
351407
def _choose_copy_or_link(symlink, srcdir):
352408
"""
353409
Determine whether to copy file contents or create a symlink depending on where the pointee resides.
@@ -378,10 +434,6 @@ def _choose_copy_or_link(symlink, srcdir):
378434
if not os.path.exists(symlink):
379435
raise BrokenSymlinkError('File {} is a broken symlink!'.format(symlink))
380436

381-
# If srcdir is a symlink, then we need a name for it that we can compare
382-
# with other paths.
383-
canonical_srcdir = os.path.realpath(srcdir)
384-
385437
pointee_as_abspath = symlink
386438
seen = set([pointee_as_abspath])
387439

@@ -412,43 +464,17 @@ def _choose_copy_or_link(symlink, srcdir):
412464

413465
seen.add(pointee_as_abspath)
414466

415-
# To make comparisons, we need to resolve all symlinks in the directory
416-
# structure leading up to pointee. However, we can't include pointee
417-
# itself otherwise it will resolve to the file that it points to in the
418-
# end (which would be wrong if pointee_filename is a symlink).
419-
canonical_pointee_dir, pointee_filename = os.path.split(pointee_as_abspath)
420-
canonical_pointee_dir = os.path.realpath(canonical_pointee_dir)
421-
422-
if canonical_pointee_dir.startswith(canonical_srcdir):
423-
# Absolute path inside of the correct dir so we need to link to it
424-
# But we need to determine what the link path should be before
425-
# returning.
426-
427-
# Construct a relative path that points from the symlinks directory
428-
# to the pointee.
429-
link_to = os.readlink(symlink)
430-
canonical_symlink_dir = os.path.realpath(os.path.dirname(symlink))
431-
relative_path = os.path.relpath(canonical_pointee_dir, canonical_symlink_dir)
432-
433-
if link_to.startswith('/'):
434-
# The original symlink was an absolute path so we will set this
435-
# one to absolute too
436-
# Note: Because absolute paths are constructed inside of
437-
# srcdir, the relative path that we need to join here has to be
438-
# relative to srcdir, not the directory that the symlink is
439-
# being created in.
440-
relative_to_srcdir = os.path.relpath(canonical_pointee_dir, canonical_srcdir)
441-
corrected_path = os.path.normpath(os.path.join(srcdir, relative_to_srcdir, pointee_filename))
442-
443-
else:
444-
# If the original link is a relative link, then we want the new
445-
# link to be relative as well
446-
corrected_path = os.path.normpath(os.path.join(relative_path, pointee_filename))
447-
448-
return ("link", corrected_path)
467+
dummy_pointee_filename, canonical_pointee_dir, canonical_srcdir = _canonize(pointee_as_abspath, srcdir)
468+
if not canonical_pointee_dir.startswith(canonical_srcdir):
469+
# pointee is a symlink that points outside of the srcdir so continue to
470+
# the next symlink in the chain.
471+
continue
449472

450-
# pointee is a symlink that points outside of the srcdir so continue to
451-
# the next symlink in the chain.
473+
# Absolute path inside of the correct dir so we need to link to it
474+
# Note: We have to make the source of the link relative or absolute
475+
# depending on what the original link was
476+
corrected_path = _format_source_link(pointee_as_abspath, symlink, srcdir)
477+
return ("link", corrected_path)
452478

453479
# The file is not a link so copy it
454480
return ('copy', pointee_as_abspath)
@@ -464,6 +490,7 @@ def _copy_symlinks(symlinks_to_process, srcdir):
464490
:param srcdir: The root directory that every piece of content must be present in.
465491
:raises ValueError: if the arguments are not correct
466492
"""
493+
copied_links = {}
467494
for source_linkpath, target_linkpath in symlinks_to_process:
468495
try:
469496
action, source_path = _choose_copy_or_link(source_linkpath, srcdir)
@@ -473,14 +500,32 @@ def _copy_symlinks(symlinks_to_process, srcdir):
473500
continue
474501

475502
if action == "copy":
476-
# Note: source_path could be a directory, so '-a' or '-r' must be
477-
# given to cp.
478-
run(['cp', '-a', source_path, target_linkpath])
479-
elif action == 'link':
503+
# Need the canonical path so that we can check if the pointed to
504+
# file has been copied before
505+
canonical_source_path = os.path.realpath(source_path)
506+
if canonical_source_path in copied_links:
507+
# The link was copied before so we need to change this to a link action
508+
# with the previous copied link location as the source for this link
509+
action = "link"
510+
source_abspath = os.path.abspath(os.path.normpath(copied_links[canonical_source_path]))
511+
source_path = _format_source_link(source_abspath, source_linkpath, srcdir)
512+
else:
513+
# All other links to the same target will need to link to the path
514+
# that we're processing now since they all pointed to the same
515+
# target before.
516+
copied_links[canonical_source_path] = source_linkpath
517+
518+
# Note: source_path could be a directory, so '-a' or '-r' must be
519+
# given to cp.
520+
run(['cp', '-a', source_path, target_linkpath])
521+
continue
522+
523+
if action == 'link':
480524
run(["ln", "-s", source_path, target_linkpath])
481-
else:
482-
# This will not happen unless _copy_or_link() has a bug.
483-
raise RuntimeError("Programming error: _copy_or_link() returned an unknown action:{}".format(action))
525+
continue
526+
527+
# This will not happen unless _copy_or_link() has a bug.
528+
raise RuntimeError("Programming error: _copy_or_link() returned an unknown action:{}".format(action))
484529

485530

486531
def _copy_decouple(srcdir, dstdir):

repos/system_upgrade/common/actors/targetuserspacecreator/tests/unit_test_targetuserspacecreator.py

Lines changed: 36 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -381,29 +381,24 @@ def temp_directory_layout(tmp_path, initial_structure):
381381
},
382382
id="Absolute_symlink_to_a_file_inside_via_a_symlink_to_the_rootdir"
383383
)),
384-
# This should be fixed but not necessarily for this release.
385-
# It makes sure that when we have two separate links to the
386-
# same file outside of /etc/pki, one of the links is copied
387-
# as a real file and the other is made a link to the copy.
388-
# (Right now, the real file is copied in place of both links.)
389-
# (pytest.param(
390-
# {
391-
# 'dir': {
392-
# 'fileA': '/outside/fileC',
393-
# 'fileB': '/outside/fileC',
394-
# },
395-
# 'outside': {
396-
# 'fileC': None,
397-
# },
398-
# },
399-
# {
400-
# 'dir': {
401-
# 'fileA': None,
402-
# 'fileB': '/dir/fileA',
403-
# },
404-
# },
405-
# id="Absolute_two_symlinks_to_the_same_copied_file"
406-
# )),
384+
(pytest.param(
385+
{
386+
'dir': {
387+
'fileA': '/outside/fileC',
388+
'fileB': '/outside/fileC',
389+
},
390+
'outside': {
391+
'fileC': None,
392+
},
393+
},
394+
{
395+
'dir': {
396+
'fileA': None,
397+
'fileB': '/dir/fileA',
398+
},
399+
},
400+
id="Absolute_two_symlinks_to_the_same_copied_file"
401+
)),
407402
(pytest.param(
408403
{
409404
'dir': {
@@ -735,29 +730,24 @@ def temp_directory_layout(tmp_path, initial_structure):
735730
},
736731
id="Relative_symlink_to_a_file_inside_via_a_symlink_to_the_rootdir"
737732
)),
738-
# This should be fixed but not necessarily for this release.
739-
# It makes sure that when we have two separate links to the
740-
# same file outside of /etc/pki, one of the links is copied
741-
# as a real file and the other is made a link to the copy.
742-
# (Right now, the real file is copied in place of both links.)
743-
# (pytest.param(
744-
# {
745-
# 'dir': {
746-
# 'fileA': '../outside/fileC',
747-
# 'fileB': '../outside/fileC',
748-
# },
749-
# 'outside': {
750-
# 'fileC': None,
751-
# },
752-
# },
753-
# {
754-
# 'dir': {
755-
# 'fileA': None,
756-
# 'fileB': 'fileA',
757-
# },
758-
# },
759-
# id="Relative_two_symlinks_to_the_same_copied_file"
760-
# )),
733+
(pytest.param(
734+
{
735+
'dir': {
736+
'fileA': '../outside/fileC',
737+
'fileB': '../outside/fileC',
738+
},
739+
'outside': {
740+
'fileC': None,
741+
},
742+
},
743+
{
744+
'dir': {
745+
'fileA': None,
746+
'fileB': 'fileA',
747+
},
748+
},
749+
id="Relative_two_symlinks_to_the_same_copied_file"
750+
)),
761751
(pytest.param(
762752
{
763753
'dir': {

0 commit comments

Comments
 (0)