diff --git a/drivers/aufs/aufs.go b/drivers/aufs/aufs.go index 964ba8c918..df1c90f588 100644 --- a/drivers/aufs/aufs.go +++ b/drivers/aufs/aufs.go @@ -36,6 +36,7 @@ import ( "time" graphdriver "github.com/containers/storage/drivers" + "github.com/containers/storage/internal/tempdir" "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/chrootarchive" "github.com/containers/storage/pkg/directory" @@ -781,3 +782,14 @@ func (a *Driver) SupportsShifting(uidmap, gidmap []idtools.IDMap) bool { func (a *Driver) Dedup(req graphdriver.DedupArgs) (graphdriver.DedupResult, error) { return graphdriver.DedupResult{}, nil } + +// DeferredRemove is not implemented. +// It calls Remove directly. +func (a *Driver) DeferredRemove(id string) (tempdir.CleanupTempDirFunc, error) { + return nil, a.Remove(id) +} + +// GetTempDirRootDir is not implemented. +func (a *Driver) GetTempDirRootDir() string { + return "" +} diff --git a/drivers/btrfs/btrfs.go b/drivers/btrfs/btrfs.go index 4a80339f40..df256de53f 100644 --- a/drivers/btrfs/btrfs.go +++ b/drivers/btrfs/btrfs.go @@ -30,6 +30,7 @@ import ( "unsafe" graphdriver "github.com/containers/storage/drivers" + "github.com/containers/storage/internal/tempdir" "github.com/containers/storage/pkg/directory" "github.com/containers/storage/pkg/fileutils" "github.com/containers/storage/pkg/idtools" @@ -678,3 +679,14 @@ func (d *Driver) AdditionalImageStores() []string { func (d *Driver) Dedup(req graphdriver.DedupArgs) (graphdriver.DedupResult, error) { return graphdriver.DedupResult{}, nil } + +// DeferredRemove is not implemented. +// It calls Remove directly. +func (d *Driver) DeferredRemove(id string) (tempdir.CleanupTempDirFunc, error) { + return nil, d.Remove(id) +} + +// GetTempDirRootDir is not implemented. +func (d *Driver) GetTempDirRootDir() string { + return "" +} diff --git a/drivers/driver.go b/drivers/driver.go index 24d7b66b08..72346bb2f8 100644 --- a/drivers/driver.go +++ b/drivers/driver.go @@ -9,6 +9,7 @@ import ( "strings" "github.com/containers/storage/internal/dedup" + "github.com/containers/storage/internal/tempdir" "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/directory" "github.com/containers/storage/pkg/fileutils" @@ -124,6 +125,13 @@ type ProtoDriver interface { CreateFromTemplate(id, template string, templateIDMappings *idtools.IDMappings, parent string, parentIDMappings *idtools.IDMappings, opts *CreateOpts, readWrite bool) error // Remove attempts to remove the filesystem layer with this id. Remove(id string) error + // DeferredRemove is used to remove the filesystem layer with this id. + // This removal happen immediately (the layer is no longer usable), + // but physically deleting the files may be deferred. + // Caller MUST call returned Cleanup function. + DeferredRemove(id string) (tempdir.CleanupTempDirFunc, error) + // GetTempDirRootDir returns the root directory for temporary directories. + GetTempDirRootDir() string // Get returns the mountpoint for the layered filesystem referred // to by this id. You can optionally specify a mountLabel or "". // Optionally it gets the mappings used to create the layer. diff --git a/drivers/overlay/overlay.go b/drivers/overlay/overlay.go index a01d6b3698..6f1b65aa05 100644 --- a/drivers/overlay/overlay.go +++ b/drivers/overlay/overlay.go @@ -24,6 +24,7 @@ import ( "github.com/containers/storage/drivers/quota" "github.com/containers/storage/internal/dedup" "github.com/containers/storage/internal/staging_lockfile" + "github.com/containers/storage/internal/tempdir" "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/chrootarchive" "github.com/containers/storage/pkg/directory" @@ -1305,17 +1306,24 @@ func (d *Driver) optsAppendMappings(opts string, uidMaps, gidMaps []idtools.IDMa // Remove cleans the directories that are created for this id. func (d *Driver) Remove(id string) error { + return d.removeCommon(id, func(path string) error { + return system.EnsureRemoveAll(path) + }) +} + +func (d *Driver) removeCommon(id string, cleanup func(string) error) error { dir := d.dir(id) lid, err := os.ReadFile(path.Join(dir, "link")) if err == nil { - if err := os.RemoveAll(path.Join(d.home, linkDir, string(lid))); err != nil { + linkPath := path.Join(d.home, linkDir, string(lid)) + if err := cleanup(linkPath); err != nil { logrus.Debugf("Failed to remove link: %v", err) } } d.releaseAdditionalLayerByID(id) - if err := system.EnsureRemoveAll(dir); err != nil && !os.IsNotExist(err) { + if err := cleanup(dir); err != nil && !os.IsNotExist(err) { return err } if d.quotaCtl != nil { @@ -1327,6 +1335,24 @@ func (d *Driver) Remove(id string) error { return nil } +func (d *Driver) GetTempDirRootDir() string { + return filepath.Join(d.home, stagingDir) +} + +func (d *Driver) DeferredRemove(id string) (tempdir.CleanupTempDirFunc, error) { + t, err := tempdir.NewTempDir(d.GetTempDirRootDir()) + if err != nil { + return nil, err + } + + if err := d.removeCommon(id, func(path string) error { + return t.Add(path) + }); err != nil { + return t.Cleanup, fmt.Errorf("failed to add to stage directory: %w", err) + } + return t.Cleanup, nil +} + // recreateSymlinks goes through the driver's home directory and checks if the diff directory // under each layer has a symlink created for it under the linkDir. If the symlink does not // exist, it creates them @@ -1353,8 +1379,8 @@ func (d *Driver) recreateSymlinks() error { // Check that for each layer, there's a link in "l" with the name in // the layer's "link" file that points to the layer's "diff" directory. for _, dir := range dirs { - // Skip over the linkDir and anything that is not a directory - if dir.Name() == linkDir || !dir.IsDir() { + // Skip over the linkDir and anything that is not a directory or tempDir + if dir.Name() == linkDir || !dir.IsDir() || dir.Name() == stagingDir { continue } // Read the "link" file under each layer to get the name of the symlink diff --git a/drivers/vfs/driver.go b/drivers/vfs/driver.go index 87ff885ec3..f4ec216f1c 100644 --- a/drivers/vfs/driver.go +++ b/drivers/vfs/driver.go @@ -11,6 +11,7 @@ import ( graphdriver "github.com/containers/storage/drivers" "github.com/containers/storage/internal/dedup" + "github.com/containers/storage/internal/tempdir" "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/directory" "github.com/containers/storage/pkg/fileutils" @@ -244,6 +245,21 @@ func (d *Driver) Remove(id string) error { return system.EnsureRemoveAll(d.dir(id)) } +func (d *Driver) GetTempDirRootDir() string { + return filepath.Join(d.home, "tempdirs") +} + +func (d *Driver) DeferredRemove(id string) (tempdir.CleanupTempDirFunc, error) { + t, err := tempdir.NewTempDir(d.GetTempDirRootDir()) + if err != nil { + return nil, err + } + if err := t.Add(d.dir(id)); err != nil { + return nil, err + } + return t.Cleanup, nil +} + // Get returns the directory for the given id. func (d *Driver) Get(id string, options graphdriver.MountOpts) (_ string, retErr error) { dir := d.dir(id) diff --git a/drivers/windows/windows.go b/drivers/windows/windows.go index 6a5c9bcd1b..ac7f815371 100644 --- a/drivers/windows/windows.go +++ b/drivers/windows/windows.go @@ -24,6 +24,7 @@ import ( "github.com/Microsoft/go-winio/backuptar" "github.com/Microsoft/hcsshim" graphdriver "github.com/containers/storage/drivers" + "github.com/containers/storage/internal/tempdir" "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/directory" "github.com/containers/storage/pkg/fileutils" @@ -1014,3 +1015,14 @@ func parseStorageOpt(storageOpt map[string]string) (*storageOptions, error) { } return &options, nil } + +// DeferredRemove is not implemented. +// It calls Remove directly. +func (d *Driver) DeferredRemove(id string) (tempdir.CleanupTempDirFunc, error) { + return nil, d.Remove(id) +} + +// GetTempDirRootDir is not implemented. +func (d *Driver) GetTempDirRootDir() string { + return "" +} diff --git a/drivers/zfs/zfs.go b/drivers/zfs/zfs.go index f53b0e1b61..630036f2dc 100644 --- a/drivers/zfs/zfs.go +++ b/drivers/zfs/zfs.go @@ -13,6 +13,7 @@ import ( "time" graphdriver "github.com/containers/storage/drivers" + "github.com/containers/storage/internal/tempdir" "github.com/containers/storage/pkg/directory" "github.com/containers/storage/pkg/idtools" "github.com/containers/storage/pkg/mount" @@ -406,6 +407,12 @@ func (d *Driver) Remove(id string) error { return nil } +// DeferredRemove is not implemented. +// It calls Remove directly. +func (d *Driver) DeferredRemove(id string) (tempdir.CleanupTempDirFunc, error) { + return nil, d.Remove(id) +} + // Get returns the mountpoint for the given id after creating the target directories if necessary. func (d *Driver) Get(id string, options graphdriver.MountOpts) (_ string, retErr error) { mountpoint := d.mountPath(id) @@ -516,3 +523,8 @@ func (d *Driver) AdditionalImageStores() []string { func (d *Driver) Dedup(req graphdriver.DedupArgs) (graphdriver.DedupResult, error) { return graphdriver.DedupResult{}, nil } + +// GetTempDirRootDir is not implemented. +func (d *Driver) GetTempDirRootDir() string { + return "" +} diff --git a/internal/tempdir/tempdir.go b/internal/tempdir/tempdir.go new file mode 100644 index 0000000000..bb52a22039 --- /dev/null +++ b/internal/tempdir/tempdir.go @@ -0,0 +1,243 @@ +package tempdir + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/containers/storage/internal/staging_lockfile" + "github.com/sirupsen/logrus" +) + +/* +Locking rules and invariants for TempDir and its recovery mechanism: + +1. TempDir Instance Locks: + - Path: 'RootDir/lock-XYZ' (in the root directory) + - Each TempDir instance creates and holds an exclusive lock on this file immediately + during NewTempDir() initialization. + - This lock signifies that the temporary directory is in active use by the + process/goroutine that holds the TempDir object. + +2. Stale Directory Recovery (separate operation): + - RecoverStaleDirs() can be called independently to identify and clean up stale + temporary directories. + - For each potential stale directory (found by listPotentialStaleDirs), it + attempts to TryLockPath() its instance lock file. + - If TryLockPath() succeeds: The directory is considered stale, and both the + directory and lock file are removed. + - If TryLockPath() fails: The directory is considered in active use by another + process/goroutine, and it's skipped. + +3. TempDir Usage: + - NewTempDir() immediately creates both the instance lock and the temporary directory. + - TempDir.Add() moves files into the existing temporary directory with counter-based naming. + - Files moved into the temporary directory are renamed with a counter-based prefix + to ensure uniqueness (e.g., "0-filename", "1-filename"). + - Once cleaned up, the TempDir instance cannot be reused - Add() will return an error. + +4. Cleanup Process: + - TempDir.Cleanup() removes both the temporary directory and its lock file. + - The instance lock is unlocked and deleted after cleanup operations are complete. + - The TempDir instance becomes inactive after cleanup (internal fields are reset). + - The TempDir instance cannot be reused after Cleanup() - Add() will fail. + +5. TempDir Lifetime: + - NewTempDir() creates both the TempDir manager and the actual temporary directory immediately. + - The temporary directory is created eagerly during NewTempDir(). + - During its lifetime, the temporary directory is protected by its instance lock. + - The temporary directory exists until Cleanup() is called, which removes both + the directory and its lock file. + - Multiple TempDir instances can coexist in the same RootDir, each with its own + unique subdirectory and lock. + - After cleanup, the TempDir instance cannot be reused. + +6. Example Directory Structure: + + RootDir/ + lock-ABC (instance lock for temp-dir-ABC) + temp-dir-ABC/ + 0-file1 + 1-file3 + lock-XYZ (instance lock for temp-dir-XYZ) + temp-dir-XYZ/ + 0-file2 +*/ +const ( + // tempDirPrefix is the prefix used for creating temporary directories. + tempDirPrefix = "temp-dir-" + // tempdirLockPrefix is the prefix used for creating lock files for temporary directories. + tempdirLockPrefix = "lock-" +) + +// TempDir represents a temporary directory that is created in a specified root directory. +// It manages the lifecycle of the temporary directory, including creation, locking, and cleanup. +// Each TempDir instance is associated with a unique subdirectory in the root directory. +// Warning: The TempDir instance should be used in a single goroutine. +type TempDir struct { + RootDir string + + tempDirPath string + // tempDirLock is a lock file (e.g., RootDir/lock-XYZ) specific to this + // TempDir instance, indicating it's in active use. + tempDirLock *staging_lockfile.StagingLockFile + tempDirLockPath string + + // counter is used to generate unique filenames for added files. + counter uint64 +} + +// CleanupTempDirFunc is a function type that can be returned by operations +// which need to perform cleanup actions later. +type CleanupTempDirFunc func() error + +// listPotentialStaleDirs scans the RootDir for directories that might be stale temporary directories. +// It identifies directories with the tempDirPrefix and their corresponding lock files with the tempdirLockPrefix. +// The function returns a map of IDs that correspond to both directories and lock files found. +// These IDs are extracted from the filenames by removing their respective prefixes. +func listPotentialStaleDirs(rootDir string) (map[string]struct{}, error) { + ids := make(map[string]struct{}) + + dirContent, err := os.ReadDir(rootDir) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("error reading temp dir %s: %w", rootDir, err) + } + + for _, entry := range dirContent { + if id, ok := strings.CutPrefix(entry.Name(), tempDirPrefix); ok { + ids[id] = struct{}{} + continue + } + + if id, ok := strings.CutPrefix(entry.Name(), tempdirLockPrefix); ok { + ids[id] = struct{}{} + } + } + return ids, nil +} + +// RecoverStaleDirs identifies and removes stale temporary directories in the root directory. +// A directory is considered stale if its lock file can be acquired (indicating no active use). +// The function attempts to remove both the directory and its lock file. +// If a directory's lock cannot be acquired, it is considered in use and is skipped. +func RecoverStaleDirs(rootDir string) error { + potentialStaleDirs, err := listPotentialStaleDirs(rootDir) + if err != nil { + return fmt.Errorf("error listing potential stale temp dirs in %s: %w", rootDir, err) + } + + if len(potentialStaleDirs) == 0 { + return nil + } + + var recoveryErrors []error + + for id := range potentialStaleDirs { + lockPath := filepath.Join(rootDir, tempdirLockPrefix+id) + tempDirPath := filepath.Join(rootDir, tempDirPrefix+id) + + // Try to lock the lock file. If it can be locked, the directory is stale. + instanceLock, err := staging_lockfile.TryLockPath(lockPath) + if err != nil { + continue + } + + if rmErr := os.RemoveAll(tempDirPath); rmErr != nil && !os.IsNotExist(rmErr) { + recoveryErrors = append(recoveryErrors, fmt.Errorf("error removing stale temp dir %s: %w", tempDirPath, rmErr)) + } + if unlockErr := instanceLock.UnlockAndDelete(); unlockErr != nil { + recoveryErrors = append(recoveryErrors, fmt.Errorf("error unlocking and deleting stale lock file %s: %w", lockPath, unlockErr)) + } + } + + return errors.Join(recoveryErrors...) +} + +// NewTempDir creates a TempDir and immediately creates both the temporary directory +// and its corresponding lock file in the specified RootDir. +// The RootDir itself will be created if it doesn't exist. +// Note: The caller MUST ensure that returned TempDir instance is cleaned up with .Cleanup(). +func NewTempDir(rootDir string) (*TempDir, error) { + if err := os.MkdirAll(rootDir, 0o700); err != nil { + return nil, fmt.Errorf("creating root temp directory %s failed: %w", rootDir, err) + } + + td := &TempDir{ + RootDir: rootDir, + } + tempDirLock, tempDirLockFileName, err := staging_lockfile.CreateAndLock(td.RootDir, tempdirLockPrefix) + if err != nil { + return nil, fmt.Errorf("creating and locking temp dir instance lock in %s failed: %w", td.RootDir, err) + } + td.tempDirLock = tempDirLock + td.tempDirLockPath = filepath.Join(td.RootDir, tempDirLockFileName) + + // Create the temporary directory that corresponds to the lock file + id := strings.TrimPrefix(tempDirLockFileName, tempdirLockPrefix) + actualTempDirPath := filepath.Join(td.RootDir, tempDirPrefix+id) + if err := os.MkdirAll(actualTempDirPath, 0o700); err != nil { + return nil, fmt.Errorf("creating temp directory %s failed: %w", actualTempDirPath, err) + } + td.tempDirPath = actualTempDirPath + td.counter = 0 + return td, nil +} + +// Add moves the specified file into the instance's temporary directory. +// The temporary directory must already exist (created during NewTempDir). +// Files are renamed with a counter-based prefix (e.g., "0-filename", "1-filename") to ensure uniqueness. +// Note: 'path' must be on the same filesystem as the TempDir for os.Rename to work. +// The caller MUST ensure .Cleanup() is called. +// If the TempDir has been cleaned up, this method will return an error. +func (td *TempDir) Add(path string) error { + if td.tempDirLock == nil { + return fmt.Errorf("temp dir instance not initialized or already cleaned up") + } + fileName := fmt.Sprintf("%d-", td.counter) + filepath.Base(path) + destPath := filepath.Join(td.tempDirPath, fileName) + td.counter++ + return os.Rename(path, destPath) +} + +// Cleanup removes the temporary directory and releases its instance lock. +// After cleanup, the TempDir instance becomes inactive and cannot be reused. +// Subsequent calls to Add() will fail. +// Multiple calls to Cleanup() are safe and will not return an error. +// Callers should typically defer Cleanup() to run after any application-level +// global locks are released to avoid holding those locks during potentially +// slow disk I/O. +func (td *TempDir) Cleanup() error { + if td.tempDirLock == nil { + logrus.Debug("Temp dir already cleaned up") + return nil + } + + if err := os.RemoveAll(td.tempDirPath); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("removing temp dir %s failed: %w", td.tempDirPath, err) + } + + lock := td.tempDirLock + td.tempDirPath = "" + td.tempDirLock = nil + td.tempDirLockPath = "" + return lock.UnlockAndDelete() +} + +// CleanupTemporaryDirectories cleans up multiple temporary directories by calling their cleanup functions. +func CleanupTemporaryDirectories(cleanFuncs ...CleanupTempDirFunc) error { + var cleanupErrors []error + for _, cleanupFunc := range cleanFuncs { + if cleanupFunc == nil { + continue + } + if err := cleanupFunc(); err != nil { + cleanupErrors = append(cleanupErrors, err) + } + } + return errors.Join(cleanupErrors...) +} diff --git a/internal/tempdir/tempdir_test.go b/internal/tempdir/tempdir_test.go new file mode 100644 index 0000000000..b87c17c21b --- /dev/null +++ b/internal/tempdir/tempdir_test.go @@ -0,0 +1,286 @@ +package tempdir + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTempDirAdd(t *testing.T) { + rootDir := t.TempDir() + td, err := NewTempDir(rootDir) + require.NoError(t, err) + defer func() { + assert.NoError(t, td.Cleanup()) + }() + + filePath := filepath.Join(t.TempDir(), "testfile.txt") + err = os.WriteFile(filePath, []byte("test content"), 0o644) + require.NoError(t, err) + + err = td.Add(filePath) + require.NoError(t, err) + + assert.NotEmpty(t, td.tempDirPath) + assert.NotNil(t, td.tempDirLock) + assert.NotEmpty(t, td.tempDirLockPath) + + files, err := os.ReadDir(td.tempDirPath) + require.NoError(t, err) + assert.Len(t, files, 1) + assert.True(t, strings.HasPrefix(files[0].Name(), "0-")) + assert.True(t, strings.HasSuffix(files[0].Name(), "testfile.txt")) + + _, err = os.Stat(filePath) + assert.True(t, os.IsNotExist(err)) +} + +func TestTempDirAddMultipleFiles(t *testing.T) { + rootDir := t.TempDir() + td, err := NewTempDir(rootDir) + require.NoError(t, err) + defer func() { + assert.NoError(t, td.Cleanup()) + }() + + tempDir := t.TempDir() + + for i := 0; i < 3; i++ { + testFile := filepath.Join(tempDir, fmt.Sprintf("testfile%d.txt", i)) + err = os.WriteFile(testFile, []byte(fmt.Sprintf("content %d", i)), 0o644) + require.NoError(t, err) + + err = td.Add(testFile) + require.NoError(t, err) + } + + files, err := os.ReadDir(td.tempDirPath) + require.NoError(t, err) + assert.Len(t, files, 3) + + for i, file := range files { + assert.Equal(t, filepath.Base(file.Name()), fmt.Sprintf("%d-testfile%d.txt", i, i)) + } +} + +func TestTempDirCleanup(t *testing.T) { + rootDir := t.TempDir() + td, err := NewTempDir(rootDir) + require.NoError(t, err) + + testFile := filepath.Join(t.TempDir(), "testfile.txt") + require.NoError(t, os.WriteFile(testFile, []byte("test"), 0o644)) + require.NoError(t, td.Add(testFile)) + + tempDirPath := td.tempDirPath + lockPath := td.tempDirLockPath + + _, err = os.Stat(tempDirPath) + assert.NoError(t, err) + _, err = os.Stat(lockPath) + assert.NoError(t, err) + + require.NoError(t, td.Cleanup()) + + _, err = os.Stat(tempDirPath) + assert.True(t, os.IsNotExist(err)) + _, err = os.Stat(lockPath) + assert.True(t, os.IsNotExist(err)) + + assert.Empty(t, td.tempDirPath) + assert.Nil(t, td.tempDirLock) + assert.Empty(t, td.tempDirLockPath) +} + +func TestTempDirCleanupNotInit(t *testing.T) { + rootDir := t.TempDir() + td, err := NewTempDir(rootDir) + require.NoError(t, err) + + assert.NoError(t, td.Cleanup()) + + assert.NoError(t, td.Cleanup()) +} + +func TestTempDirReInitAfterCleanup(t *testing.T) { + rootDir := t.TempDir() + td, err := NewTempDir(rootDir) + require.NoError(t, err) + + testFile1 := filepath.Join(t.TempDir(), "testfile1.txt") + err = os.WriteFile(testFile1, []byte("test1"), 0o644) + require.NoError(t, err) + + require.NoError(t, td.Add(testFile1)) + + require.NoError(t, td.Cleanup()) + + testFile2 := filepath.Join(t.TempDir(), "testfile2.txt") + require.NoError(t, os.WriteFile(testFile2, []byte("test2"), 0o644)) + require.Error(t, td.Add(testFile2)) + + assert.Empty(t, td.tempDirPath) + assert.Nil(t, td.tempDirLock) +} + +func TestListPotentialStaleDirs(t *testing.T) { + rootDir := t.TempDir() + + expectedIds := map[string]struct{}{} + + for i := 0; i < 3; i++ { + lockfile, err := os.CreateTemp(rootDir, tempdirLockPrefix) + assert.NoError(t, err) + lockfileName := filepath.Base(lockfile.Name()) + lockfile.Close() + id := strings.TrimPrefix(lockfileName, tempdirLockPrefix) + tempDirPath := filepath.Join(rootDir, tempDirPrefix+id) + err = os.MkdirAll(tempDirPath, 0o755) + require.NoError(t, err) + expectedIds[id] = struct{}{} + } + + ids, err := listPotentialStaleDirs(rootDir) + require.NoError(t, err) + assert.Equal(t, expectedIds, ids) +} + +func TestListPotentialStaleDirsNonexistentDir(t *testing.T) { + nonexistentDir := filepath.Join(t.TempDir(), "nonexistent") + + ids, err := listPotentialStaleDirs(nonexistentDir) + assert.NoError(t, err) + assert.Nil(t, ids) +} + +func TestRecoverStaleDirs(t *testing.T) { + rootDir := t.TempDir() + + staleDir := filepath.Join(rootDir, tempDirPrefix+"stale") + staleLock := filepath.Join(rootDir, tempdirLockPrefix+"stale") + + require.NoError(t, os.MkdirAll(staleDir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(staleDir, "somefile"), []byte("data"), 0o644)) + require.NoError(t, os.WriteFile(staleLock, []byte{}, 0o644)) + + _, err := os.Stat(staleDir) + assert.NoError(t, err) + _, err = os.Stat(staleLock) + assert.NoError(t, err) + + assert.NoError(t, RecoverStaleDirs(rootDir)) + + _, err = os.Stat(staleDir) + assert.True(t, os.IsNotExist(err)) + _, err = os.Stat(staleLock) + assert.True(t, os.IsNotExist(err)) +} + +func TestRecoverStaleDirsSkipsActiveDirs(t *testing.T) { + rootDir := t.TempDir() + + td, err := NewTempDir(rootDir) + require.NoError(t, err) + + testFile := filepath.Join(t.TempDir(), "testfile.txt") + require.NoError(t, os.WriteFile(testFile, []byte("test"), 0o644)) + require.NoError(t, td.Add(testFile)) + defer func() { + assert.NoError(t, td.Cleanup()) + }() + + activeTempDir := td.tempDirPath + activeLock := td.tempDirLockPath + + staleDir := filepath.Join(rootDir, tempDirPrefix+"stale") + staleLock := filepath.Join(rootDir, tempdirLockPrefix+"stale") + require.NoError(t, os.MkdirAll(staleDir, 0o755)) + require.NoError(t, os.WriteFile(staleLock, []byte{}, 0o644)) + + assert.NoError(t, RecoverStaleDirs(rootDir)) + + _, err = os.Stat(activeTempDir) + assert.NoError(t, err) + _, err = os.Stat(activeLock) + assert.NoError(t, err) + + _, err = os.Stat(staleDir) + assert.True(t, os.IsNotExist(err)) + _, err = os.Stat(staleLock) + assert.True(t, os.IsNotExist(err)) +} + +func TestTempDirMultipleInstances(t *testing.T) { + rootDir := t.TempDir() + + td1, err := NewTempDir(rootDir) + require.NoError(t, err) + defer func() { + assert.NoError(t, td1.Cleanup()) + }() + + td2, err := NewTempDir(rootDir) + require.NoError(t, err) + defer func() { + assert.NoError(t, td2.Cleanup()) + }() + + testFile1 := filepath.Join(t.TempDir(), "testfile1.txt") + require.NoError(t, os.WriteFile(testFile1, []byte("test1"), 0o644)) + require.NoError(t, td1.Add(testFile1)) + + testFile2 := filepath.Join(t.TempDir(), "testfile2.txt") + require.NoError(t, os.WriteFile(testFile2, []byte("test2"), 0o644)) + require.NoError(t, td2.Add(testFile2)) + + assert.NotEqual(t, td1.tempDirPath, td2.tempDirPath) + assert.NotEqual(t, td1.tempDirLockPath, td2.tempDirLockPath) + + _, err = os.Stat(td1.tempDirPath) + assert.NoError(t, err) + _, err = os.Stat(td2.tempDirPath) + assert.NoError(t, err) +} + +func TestTempDirFileNaming(t *testing.T) { + rootDir := t.TempDir() + td, err := NewTempDir(rootDir) + require.NoError(t, err) + defer func() { + assert.NoError(t, td.Cleanup()) + }() + + tempDir := t.TempDir() + + testCases := []string{ + "simple.txt", + "file with spaces.txt", + "file-with-dashes.txt", + "file.with.dots.txt", + } + + for i, filename := range testCases { + testFile := filepath.Join(tempDir, filename) + require.NoError(t, os.WriteFile(testFile, []byte("test"), 0o644)) + + require.NoError(t, td.Add(testFile)) + + files, err := os.ReadDir(td.tempDirPath) + require.NoError(t, err) + + found := false + expectedName := fmt.Sprintf("%d-%s", i, filename) + for _, file := range files { + if file.Name() == expectedName { + found = true + break + } + } + assert.True(t, found, "Expected file %s not found", expectedName) + } +} diff --git a/layers.go b/layers.go index a84706e4c1..0c45f1cdf8 100644 --- a/layers.go +++ b/layers.go @@ -18,6 +18,7 @@ import ( "time" drivers "github.com/containers/storage/drivers" + "github.com/containers/storage/internal/tempdir" "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/idtools" "github.com/containers/storage/pkg/ioutils" @@ -43,6 +44,7 @@ const ( // in readers (which, for implementation reasons, gives other writers the opportunity to create more inconsistent state) // until we just give up. maxLayerStoreCleanupIterations = 3 + tempDirPath = "tmp" ) type layerLocations uint8 @@ -290,8 +292,14 @@ type rwLayerStore interface { // updateNames modifies names associated with a layer based on (op, names). updateNames(id string, names []string, op updateNameOperation) error - // Delete deletes a layer with the specified name or ID. - Delete(id string) error + // deleteWhileHoldingLock deletes a layer with the specified name or ID. + deleteWhileHoldingLock(id string) error + + // deferredDelete deletes a layer with the specified name or ID. + // This removal happen immediately (the layer is no longer usable), + // but physically deleting the files may be deferred. + // Caller MUST call all returned cleanup functions outside of the locks. + deferredDelete(id string) ([]tempdir.CleanupTempDirFunc, error) // Wipe deletes all layers. Wipe() error @@ -794,6 +802,17 @@ func (r *layerStore) load(lockedForWriting bool) (bool, error) { layers := []*Layer{} ids := make(map[string]*Layer) + if r.lockfile.IsReadWrite() { + if err := tempdir.RecoverStaleDirs(filepath.Join(r.layerdir, tempDirPath)); err != nil { + return false, err + } + if driverTempDirPath := r.driver.GetTempDirRootDir(); driverTempDirPath != "" { + if err := tempdir.RecoverStaleDirs(driverTempDirPath); err != nil { + return false, err + } + } + } + for locationIndex := range numLayerLocationIndex { location := layerLocationFromIndex(locationIndex) rpath := r.jsonPath[locationIndex] @@ -935,7 +954,12 @@ func (r *layerStore) load(lockedForWriting bool) (bool, error) { // Now actually delete the layers for _, layer := range layersToDelete { logrus.Warnf("Found incomplete layer %q, deleting it", layer.ID) - err := r.deleteInternal(layer.ID) + cleanFunctions, err := r.internalDelete(layer.ID) + defer func() { + if err := tempdir.CleanupTemporaryDirectories(cleanFunctions...); err != nil { + logrus.Errorf("Error cleaning up temporary directories: %v", err) + } + }() if err != nil { // Don't return the error immediately, because deleteInternal does not saveLayers(); // Even if deleting one incomplete layer fails, call saveLayers() so that other possible successfully @@ -1334,7 +1358,7 @@ func (r *layerStore) PutAdditionalLayer(id string, parentLayer *Layer, names []s r.bytocsum[layer.TOCDigest] = append(r.bytocsum[layer.TOCDigest], layer.ID) } if err := r.saveFor(layer); err != nil { - if e := r.Delete(layer.ID); e != nil { + if e := r.deleteWhileHoldingLock(layer.ID); e != nil { logrus.Errorf("While recovering from a failure to save layers, error deleting layer %#v: %v", id, e) } return nil, err @@ -1469,7 +1493,7 @@ func (r *layerStore) create(id string, parentLayer *Layer, names []string, mount if cleanupFailureContext == "" { cleanupFailureContext = "unknown: cleanupFailureContext not set at the failure site" } - if e := r.Delete(id); e != nil { + if e := r.deleteWhileHoldingLock(id); e != nil { logrus.Errorf("While recovering from a failure (%s), error deleting layer %#v: %v", cleanupFailureContext, id, e) } } @@ -1920,13 +1944,15 @@ func layerHasIncompleteFlag(layer *Layer) bool { } // Requires startWriting. -func (r *layerStore) deleteInternal(id string) error { +// Caller MUST run all returned cleanup functions after this. Ideally outside of +// the startWriting. +func (r *layerStore) internalDelete(id string) ([]tempdir.CleanupTempDirFunc, error) { if !r.lockfile.IsReadWrite() { - return fmt.Errorf("not allowed to delete layers at %q: %w", r.layerdir, ErrStoreIsReadOnly) + return nil, fmt.Errorf("not allowed to delete layers at %q: %w", r.layerdir, ErrStoreIsReadOnly) } layer, ok := r.lookup(id) if !ok { - return ErrLayerUnknown + return nil, ErrLayerUnknown } // Ensure that if we are interrupted, the layer will be cleaned up. if !layerHasIncompleteFlag(layer) { @@ -1935,16 +1961,29 @@ func (r *layerStore) deleteInternal(id string) error { } layer.Flags[incompleteFlag] = true if err := r.saveFor(layer); err != nil { - return err + return nil, err } } + tempDirectory, err := tempdir.NewTempDir(filepath.Join(r.layerdir, tempDirPath)) + if err != nil { + return nil, err + } // We never unset incompleteFlag; below, we remove the entire object from r.layers. id = layer.ID - if err := r.driver.Remove(id); err != nil && !errors.Is(err, os.ErrNotExist) { - return err + cleanFunctions := []tempdir.CleanupTempDirFunc{} + cleanFunc, err := r.driver.DeferredRemove(id) + cleanFunctions = append(cleanFunctions, cleanFunc) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return cleanFunctions, err + } + + cleanFunctions = append(cleanFunctions, tempDirectory.Cleanup) + if err := tempDirectory.Add(r.tspath(id)); err != nil && !errors.Is(err, os.ErrNotExist) { + return cleanFunctions, err + } + if err := tempDirectory.Add(r.datadir(id)); err != nil && !errors.Is(err, os.ErrNotExist) { + return cleanFunctions, err } - os.Remove(r.tspath(id)) - os.RemoveAll(r.datadir(id)) delete(r.byid, id) for _, name := range layer.Names { delete(r.byname, name) @@ -1968,7 +2007,7 @@ func (r *layerStore) deleteInternal(id string) error { }) { selinux.ReleaseLabel(mountLabel) } - return nil + return cleanFunctions, nil } // Requires startWriting. @@ -1988,10 +2027,19 @@ func (r *layerStore) deleteInDigestMap(id string) { } // Requires startWriting. -func (r *layerStore) Delete(id string) error { +func (r *layerStore) deleteWhileHoldingLock(id string) error { + cleanupFunctions, deferErr := r.deferredDelete(id) + cleanupErr := tempdir.CleanupTemporaryDirectories(cleanupFunctions...) + return errors.Join(deferErr, cleanupErr) +} + +// Requires startWriting. +// Caller MUST run all returned cleanup functions after this. Ideally outside of +// the startWriting. +func (r *layerStore) deferredDelete(id string) ([]tempdir.CleanupTempDirFunc, error) { layer, ok := r.lookup(id) if !ok { - return ErrLayerUnknown + return nil, ErrLayerUnknown } id = layer.ID // The layer may already have been explicitly unmounted, but if not, we @@ -2003,13 +2051,14 @@ func (r *layerStore) Delete(id string) error { break } if err != nil { - return err + return nil, err } } - if err := r.deleteInternal(id); err != nil { - return err + cleanFunctions, err := r.internalDelete(id) + if err != nil { + return cleanFunctions, err } - return r.saveFor(layer) + return cleanFunctions, r.saveFor(layer) } // Requires startReading or startWriting. @@ -2039,7 +2088,7 @@ func (r *layerStore) Wipe() error { return r.byid[ids[i]].Created.After(r.byid[ids[j]].Created) }) for _, id := range ids { - if err := r.Delete(id); err != nil { + if err := r.deleteWhileHoldingLock(id); err != nil { return err } } @@ -2571,7 +2620,7 @@ func (r *layerStore) applyDiffFromStagingDirectory(id string, diffOutput *driver } for k, v := range diffOutput.BigData { if err := r.SetBigData(id, k, bytes.NewReader(v)); err != nil { - if err2 := r.Delete(id); err2 != nil { + if err2 := r.deleteWhileHoldingLock(id); err2 != nil { logrus.Errorf("While recovering from a failure to set big data, error deleting layer %#v: %v", id, err2) } return err diff --git a/store.go b/store.go index 073a766f89..480f74b76d 100644 --- a/store.go +++ b/store.go @@ -22,6 +22,7 @@ import ( drivers "github.com/containers/storage/drivers" "github.com/containers/storage/internal/dedup" + "github.com/containers/storage/internal/tempdir" "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/directory" "github.com/containers/storage/pkg/idtools" @@ -1758,7 +1759,7 @@ func (s *store) imageTopLayerForMapping(image *Image, ristore roImageStore, rlst } // By construction, createMappedLayer can only be true if ristore == s.imageStore. if err = s.imageStore.addMappedTopLayer(image.ID, mappedLayer.ID); err != nil { - if err2 := rlstore.Delete(mappedLayer.ID); err2 != nil { + if err2 := rlstore.deleteWhileHoldingLock(mappedLayer.ID); err2 != nil { err = fmt.Errorf("deleting layer %q: %v: %w", mappedLayer.ID, err2, err) } return nil, fmt.Errorf("registering ID-mapped layer with image %q: %w", image.ID, err) @@ -1943,7 +1944,7 @@ func (s *store) CreateContainer(id string, names []string, image, layer, metadat } container, err := s.containerStore.create(id, names, imageID, layer, &options) if err != nil || container == nil { - if err2 := rlstore.Delete(layer); err2 != nil { + if err2 := rlstore.deleteWhileHoldingLock(layer); err2 != nil { if err == nil { err = fmt.Errorf("deleting layer %#v: %w", layer, err2) } else { @@ -2540,7 +2541,13 @@ func (s *store) Lookup(name string) (string, error) { return "", ErrLayerUnknown } -func (s *store) DeleteLayer(id string) error { +func (s *store) DeleteLayer(id string) (err error) { + cleanupFunctions := []tempdir.CleanupTempDirFunc{} + defer func() { + if cleanupErr := tempdir.CleanupTemporaryDirectories(cleanupFunctions...); cleanupErr != nil { + err = errors.Join(cleanupErr, err) + } + }() return s.writeToAllStores(func(rlstore rwLayerStore) error { if rlstore.Exists(id) { if l, err := rlstore.Get(id); err != nil { @@ -2574,7 +2581,9 @@ func (s *store) DeleteLayer(id string) error { return fmt.Errorf("layer %v used by container %v: %w", id, container.ID, ErrLayerUsedByContainer) } } - if err := rlstore.Delete(id); err != nil { + cf, err := rlstore.deferredDelete(id) + cleanupFunctions = append(cleanupFunctions, cf...) + if err != nil { return fmt.Errorf("delete layer %v: %w", id, err) } @@ -2593,6 +2602,12 @@ func (s *store) DeleteLayer(id string) error { func (s *store) DeleteImage(id string, commit bool) (layers []string, err error) { layersToRemove := []string{} + cleanupFunctions := []tempdir.CleanupTempDirFunc{} + defer func() { + if cleanupErr := tempdir.CleanupTemporaryDirectories(cleanupFunctions...); cleanupErr != nil { + err = errors.Join(cleanupErr, err) + } + }() if err := s.writeToAllStores(func(rlstore rwLayerStore) error { // Delete image from all available imagestores configured to be used. imageFound := false @@ -2698,7 +2713,9 @@ func (s *store) DeleteImage(id string, commit bool) (layers []string, err error) } if commit { for _, layer := range layersToRemove { - if err = rlstore.Delete(layer); err != nil { + cf, err := rlstore.deferredDelete(layer) + cleanupFunctions = append(cleanupFunctions, cf...) + if err != nil { return err } } @@ -2710,7 +2727,13 @@ func (s *store) DeleteImage(id string, commit bool) (layers []string, err error) return layersToRemove, nil } -func (s *store) DeleteContainer(id string) error { +func (s *store) DeleteContainer(id string) (err error) { + cleanupFunctions := []tempdir.CleanupTempDirFunc{} + defer func() { + if cleanupErr := tempdir.CleanupTemporaryDirectories(cleanupFunctions...); cleanupErr != nil { + err = errors.Join(cleanupErr, err) + } + }() return s.writeToAllStores(func(rlstore rwLayerStore) error { if !s.containerStore.Exists(id) { return ErrNotAContainer @@ -2726,7 +2749,9 @@ func (s *store) DeleteContainer(id string) error { // the container record that refers to it, effectively losing // track of it if rlstore.Exists(container.LayerID) { - if err := rlstore.Delete(container.LayerID); err != nil { + cf, err := rlstore.deferredDelete(container.LayerID) + cleanupFunctions = append(cleanupFunctions, cf...) + if err != nil { return err } } @@ -2752,12 +2777,20 @@ func (s *store) DeleteContainer(id string) error { }) } -func (s *store) Delete(id string) error { +func (s *store) Delete(id string) (err error) { + cleanupFunctions := []tempdir.CleanupTempDirFunc{} + defer func() { + if cleanupErr := tempdir.CleanupTemporaryDirectories(cleanupFunctions...); cleanupErr != nil { + err = errors.Join(cleanupErr, err) + } + }() return s.writeToAllStores(func(rlstore rwLayerStore) error { if s.containerStore.Exists(id) { if container, err := s.containerStore.Get(id); err == nil { if rlstore.Exists(container.LayerID) { - if err = rlstore.Delete(container.LayerID); err != nil { + cf, err := rlstore.deferredDelete(container.LayerID) + cleanupFunctions = append(cleanupFunctions, cf...) + if err != nil { return err } if err = s.containerStore.Delete(id); err != nil { @@ -2781,7 +2814,9 @@ func (s *store) Delete(id string) error { return s.imageStore.Delete(id) } if rlstore.Exists(id) { - return rlstore.Delete(id) + cf, err := rlstore.deferredDelete(id) + cleanupFunctions = append(cleanupFunctions, cf...) + return err } return ErrLayerUnknown }) diff --git a/store_test.go b/store_test.go index d8a1ad2387..df1342e53d 100644 --- a/store_test.go +++ b/store_test.go @@ -569,3 +569,63 @@ func TestStoreMultiList(t *testing.T) { store.Free() } + +func TestStoreDelete(t *testing.T) { + reexec.Init() + + store := newTestStore(t, StoreOptions{}) + + options := MultiListOptions{ + Layers: true, + Images: true, + Containers: true, + } + + expectedResult, err := store.MultiList(options) + require.NoError(t, err) + + _, err = store.CreateLayer("LayerNoUsed", "", []string{"not-used"}, "", false, nil) + require.NoError(t, err) + + _, err = store.CreateLayer("Layer", "", []string{"l1"}, "", false, nil) + require.NoError(t, err) + + _, err = store.CreateImage("Image1", []string{"i1"}, "Layer", "", nil) + require.NoError(t, err) + + _, err = store.CreateImage("Image", []string{"i"}, "Layer", "", nil) + require.NoError(t, err) + + _, err = store.CreateContainer("Container", []string{"c"}, "Image", "", "", nil) + require.NoError(t, err) + + _, err = store.CreateContainer("Container1", []string{"c1"}, "Image1", "", "", nil) + require.NoError(t, err) + + err = store.DeleteContainer("Container") + require.NoError(t, err) + + _, err = store.DeleteImage("Image", true) + require.NoError(t, err) + + err = store.DeleteContainer("Container1") + require.NoError(t, err) + + _, err = store.DeleteImage("Image1", true) + require.NoError(t, err) + + err = store.DeleteLayer("LayerNoUsed") + require.NoError(t, err) + + listResults, err := store.MultiList(options) + require.NoError(t, err) + + require.Equal(t, expectedResult.Layers, listResults.Layers) + require.Equal(t, expectedResult.Containers, listResults.Containers) + require.Equal(t, expectedResult.Images, listResults.Images) + + _, err = store.Shutdown(true) + require.Nil(t, err) + + store.Free() +} diff --git a/userns.go b/userns.go index 9cfd6ea347..117a732ce0 100644 --- a/userns.go +++ b/userns.go @@ -202,7 +202,7 @@ outer: return 0, err } defer func() { - if err2 := rlstore.Delete(clayer.ID); err2 != nil { + if err2 := rlstore.deleteWhileHoldingLock(clayer.ID); err2 != nil { if retErr == nil { retErr = fmt.Errorf("deleting temporary layer %#v: %w", clayer.ID, err2) } else {