Skip to content

Commit fe88d06

Browse files
committed
db: fix obsolete file metric underflows
Previously a race existed in the calculation of obsolete file metrics resulting in underflow. The values within versionSet.metrics were reset and recalculated to reflect the set of files in versionSet.obsoleteTables whenever new files were added to obsoleteFiles. Additionally, the cleanup manager invoked a callback to decrease versionSet.metrics whenever a table was deleted. The recalculation of versionSet.metrics could reset the metrics to less than the sum of outstanding pending deletes. When the cleanup manager eventually deleted the pending tables, these metrics would underflow. This commit fixes the bug by maintaining separate stats for all files that have been enqueued for the cleanup manager and all files that have been successfully deleted. The volume of outstanding, pending deletions is the difference between the two. For now, there's an additional wart that the set of files that are sitting in versionSet.obsoleteTables are still separately tracked. Informs #4811.
1 parent c4b2dad commit fe88d06

File tree

5 files changed

+141
-37
lines changed

5 files changed

+141
-37
lines changed

db.go

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -459,10 +459,16 @@ type DB struct {
459459
noOngoingFlushStartTime time.Time
460460
}
461461

462-
// Non-zero when file cleaning is disabled. The disabled count acts as a
463-
// reference count to prohibit file cleaning. See
464-
// DB.{disable,Enable}FileDeletions().
465-
disableFileDeletions int
462+
fileDeletions struct {
463+
// Non-zero when file cleaning is disabled. The disabled count acts
464+
// as a reference count to prohibit file cleaning. See
465+
// DB.{disable,enable}FileDeletions().
466+
disableCount int
467+
// queuedStats holds cumulative stats for tables that have been
468+
// queued for deletion by the cleanup manager. These stats are
469+
// monotonically increasing for the *DB's lifetime.
470+
queuedStats obsoleteTableStats
471+
}
466472

467473
snapshots struct {
468474
// The list of active snapshots.
@@ -1955,6 +1961,7 @@ func (d *DB) AsyncFlush() (<-chan struct{}, error) {
19551961
func (d *DB) Metrics() *Metrics {
19561962
metrics := &Metrics{}
19571963
walStats := d.mu.log.manager.Stats()
1964+
completedCleanupStats := d.cleanupManager.CompletedStats()
19581965

19591966
d.mu.Lock()
19601967
vers := d.mu.versions.currentVersion()
@@ -2015,6 +2022,24 @@ func (d *DB) Metrics() *Metrics {
20152022
}
20162023
}
20172024
metrics.private.optionsFileSize = d.optionsFileSize
2025+
// The obsolete table metrics have a subtle calculation:
2026+
//
2027+
// (A) The vs.metrics.Table.[Local.]ObsoleteSize fields reflect the set of
2028+
// files currently sitting in vs.obsoleteTables but not yet enqueued to the
2029+
// cleanup manager.
2030+
//
2031+
// (B) The d.mu.fileDeletions.queuedStats field holds the set of files that
2032+
// have been queued for deletion by the cleanup manager.
2033+
//
2034+
// (C) The cleanup manager also maintains cumulative stats for the set of
2035+
// files that have been deleted.
2036+
//
2037+
// The value of currently pending obsolete files is (A) + (B) - (C).
2038+
pendingObsoleteFileStats := d.mu.fileDeletions.queuedStats
2039+
pendingObsoleteFileStats.Sub(completedCleanupStats)
2040+
metrics.Table.Local.ObsoleteSize += pendingObsoleteFileStats.local.size
2041+
metrics.Table.ObsoleteCount += int64(pendingObsoleteFileStats.total.count)
2042+
metrics.Table.ObsoleteSize += pendingObsoleteFileStats.total.size
20182043

20192044
// TODO(jackson): Consider making these metrics optional.
20202045
metrics.Keys.RangeKeySetsCount = *rangeKeySetsAnnotator.MultiLevelAnnotation(vers.RangeKeyLevels[:])

internal/invariants/off.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,17 @@ func (*Value[V]) Get() V {
4242

4343
// Store stores the value.
4444
func (*Value[V]) Store(v V) {}
45+
46+
// SafeSub returns a - b. If a < b, it panics in invariant builds and returns 0
47+
// in non-invariant builds.
48+
func SafeSub[T Integer](a, b T) T {
49+
if a < b {
50+
return 0
51+
}
52+
return a - b
53+
}
54+
55+
// Integer is a constraint that permits any integer type.
56+
type Integer interface {
57+
~int | ~int8 | ~int16 | ~int32 | ~int64 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr
58+
}

internal/invariants/on.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
package invariants
99

10+
import "fmt"
11+
1012
// Enabled is true if we were built with the "invariants" or "race" build tags.
1113
const Enabled = true
1214

@@ -58,3 +60,17 @@ func (v *Value[V]) Get() V {
5860
func (v *Value[V]) Store(inner V) {
5961
v.v = inner
6062
}
63+
64+
// SafeSub returns a - b. If a < b, it panics in invariant builds and returns 0
65+
// in non-invariant builds.
66+
func SafeSub[T Integer](a, b T) T {
67+
if a < b {
68+
panic(fmt.Sprintf("underflow: %d - %d", a, b))
69+
}
70+
return a - b
71+
}
72+
73+
// Integer is a constraint that permits any integer type.
74+
type Integer interface {
75+
~int | ~int8 | ~int16 | ~int32 | ~int64 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr
76+
}

obsolete_files.go

Lines changed: 81 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414

1515
"github.com/cockroachdb/errors/oserror"
1616
"github.com/cockroachdb/pebble/v2/internal/base"
17+
"github.com/cockroachdb/pebble/v2/internal/invariants"
1718
"github.com/cockroachdb/pebble/v2/objstorage"
1819
"github.com/cockroachdb/pebble/v2/vfs"
1920
"github.com/cockroachdb/pebble/v2/wal"
@@ -30,10 +31,9 @@ type DeleteCleaner = base.DeleteCleaner
3031
type ArchiveCleaner = base.ArchiveCleaner
3132

3233
type cleanupManager struct {
33-
opts *Options
34-
objProvider objstorage.Provider
35-
onTableDeleteFn func(fileSize uint64, isLocal bool)
36-
deletePacer *deletionPacer
34+
opts *Options
35+
objProvider objstorage.Provider
36+
deletePacer *deletionPacer
3737

3838
// jobsCh is used as the cleanup job queue.
3939
jobsCh chan *cleanupJob
@@ -44,6 +44,7 @@ type cleanupManager struct {
4444
sync.Mutex
4545
// totalJobs is the total number of enqueued jobs (completed or in progress).
4646
totalJobs int
47+
completedStats obsoleteTableStats
4748
completedJobs int
4849
completedJobsCond sync.Cond
4950
jobsQueueWarningIssued bool
@@ -73,22 +74,19 @@ type obsoleteFile struct {
7374
type cleanupJob struct {
7475
jobID JobID
7576
obsoleteFiles []obsoleteFile
77+
tableStats obsoleteTableStats
7678
}
7779

7880
// openCleanupManager creates a cleanupManager and starts its background goroutine.
7981
// The cleanupManager must be Close()d.
8082
func openCleanupManager(
81-
opts *Options,
82-
objProvider objstorage.Provider,
83-
onTableDeleteFn func(fileSize uint64, isLocal bool),
84-
getDeletePacerInfo func() deletionPacerInfo,
83+
opts *Options, objProvider objstorage.Provider, getDeletePacerInfo func() deletionPacerInfo,
8584
) *cleanupManager {
8685
cm := &cleanupManager{
87-
opts: opts,
88-
objProvider: objProvider,
89-
onTableDeleteFn: onTableDeleteFn,
90-
deletePacer: newDeletionPacer(time.Now(), int64(opts.TargetByteDeletionRate), getDeletePacerInfo),
91-
jobsCh: make(chan *cleanupJob, jobsQueueDepth),
86+
opts: opts,
87+
objProvider: objProvider,
88+
deletePacer: newDeletionPacer(time.Now(), int64(opts.TargetByteDeletionRate), getDeletePacerInfo),
89+
jobsCh: make(chan *cleanupJob, jobsQueueDepth),
9290
}
9391
cm.mu.completedJobsCond.L = &cm.mu.Mutex
9492
cm.waitGroup.Add(1)
@@ -102,6 +100,14 @@ func openCleanupManager(
102100
return cm
103101
}
104102

103+
// CompletedStats returns the stats summarizing tables deleted. The returned
104+
// stats increase monotonically over the lifetime of the DB.
105+
func (cm *cleanupManager) CompletedStats() obsoleteTableStats {
106+
cm.mu.Lock()
107+
defer cm.mu.Unlock()
108+
return cm.mu.completedStats
109+
}
110+
105111
// Close stops the background goroutine, waiting until all queued jobs are completed.
106112
// Delete pacing is disabled for the remaining jobs.
107113
func (cm *cleanupManager) Close() {
@@ -110,10 +116,13 @@ func (cm *cleanupManager) Close() {
110116
}
111117

112118
// EnqueueJob adds a cleanup job to the manager's queue.
113-
func (cm *cleanupManager) EnqueueJob(jobID JobID, obsoleteFiles []obsoleteFile) {
119+
func (cm *cleanupManager) EnqueueJob(
120+
jobID JobID, obsoleteFiles []obsoleteFile, tableStats obsoleteTableStats,
121+
) {
114122
job := &cleanupJob{
115123
jobID: jobID,
116124
obsoleteFiles: obsoleteFiles,
125+
tableStats: tableStats,
117126
}
118127

119128
// Report deleted bytes to the pacer, which can use this data to potentially
@@ -165,7 +174,6 @@ func (cm *cleanupManager) mainLoop() {
165174
switch of.fileType {
166175
case fileTypeTable:
167176
cm.maybePace(&tb, of.fileType, of.nonLogFile.fileNum, of.nonLogFile.fileSize)
168-
cm.onTableDeleteFn(of.nonLogFile.fileSize, of.nonLogFile.isLocal)
169177
cm.deleteObsoleteObject(fileTypeTable, job.jobID, of.nonLogFile.fileNum)
170178
case fileTypeLog:
171179
cm.deleteObsoleteFile(of.logFile.FS, fileTypeLog, job.jobID, of.logFile.Path,
@@ -177,6 +185,7 @@ func (cm *cleanupManager) mainLoop() {
177185
}
178186
}
179187
cm.mu.Lock()
188+
cm.mu.completedStats.Add(job.tableStats)
180189
cm.mu.completedJobs++
181190
cm.mu.completedJobsCond.Broadcast()
182191
cm.maybeLogLocked()
@@ -322,17 +331,6 @@ func (d *DB) getDeletionPacerInfo() deletionPacerInfo {
322331
return pacerInfo
323332
}
324333

325-
// onObsoleteTableDelete is called to update metrics when an sstable is deleted.
326-
func (d *DB) onObsoleteTableDelete(fileSize uint64, isLocal bool) {
327-
d.mu.Lock()
328-
d.mu.versions.metrics.Table.ObsoleteCount--
329-
d.mu.versions.metrics.Table.ObsoleteSize -= fileSize
330-
if isLocal {
331-
d.mu.versions.metrics.Table.Local.ObsoleteSize -= fileSize
332-
}
333-
d.mu.Unlock()
334-
}
335-
336334
// scanObsoleteFiles scans the filesystem for files that are no longer needed
337335
// and adds those to the internal lists of obsolete files. Note that the files
338336
// are not actually deleted by this method. A subsequent call to
@@ -439,7 +437,7 @@ func (d *DB) scanObsoleteFiles(list []string, flushableIngests []*ingestedFlusha
439437
//
440438
// d.mu must be held when calling this method.
441439
func (d *DB) disableFileDeletions() {
442-
d.mu.disableFileDeletions++
440+
d.mu.fileDeletions.disableCount++
443441
d.mu.Unlock()
444442
defer d.mu.Lock()
445443
d.cleanupManager.Wait()
@@ -450,11 +448,11 @@ func (d *DB) disableFileDeletions() {
450448
//
451449
// d.mu must be held when calling this method.
452450
func (d *DB) enableFileDeletions() {
453-
if d.mu.disableFileDeletions <= 0 {
451+
if d.mu.fileDeletions.disableCount <= 0 {
454452
panic("pebble: file deletion disablement invariant violated")
455453
}
456-
d.mu.disableFileDeletions--
457-
if d.mu.disableFileDeletions > 0 {
454+
d.mu.fileDeletions.disableCount--
455+
if d.mu.fileDeletions.disableCount > 0 {
458456
return
459457
}
460458
d.deleteObsoleteFiles(d.newJobIDLocked())
@@ -469,7 +467,7 @@ type fileInfo = base.FileInfo
469467
// Does nothing if file deletions are disabled (see disableFileDeletions). A
470468
// cleanup job will be scheduled when file deletions are re-enabled.
471469
func (d *DB) deleteObsoleteFiles(jobID JobID) {
472-
if d.mu.disableFileDeletions > 0 {
470+
if d.mu.fileDeletions.disableCount > 0 {
473471
return
474472
}
475473
_, noRecycle := d.opts.Cleaner.(base.NeedsFileContents)
@@ -507,6 +505,14 @@ func (d *DB) deleteObsoleteFiles(jobID JobID) {
507505
obsoleteOptions := d.mu.versions.obsoleteOptions
508506
d.mu.versions.obsoleteOptions = nil
509507

508+
// Compute the stats for the tables being queued for deletion and add them
509+
// to the running total. These stats will be used during DB.Metrics() to
510+
// calculate the count and size of pending obsolete tables by diffing these
511+
// stats and the stats reported by the cleanup manager.
512+
tableStats := calculateObsoleteTableStats(obsoleteTables)
513+
d.mu.fileDeletions.queuedStats.Add(tableStats)
514+
d.mu.versions.updateObsoleteTableMetricsLocked()
515+
510516
// Release d.mu while preparing the cleanup job and possibly waiting.
511517
// Note the unusual order: Unlock and then Lock.
512518
d.mu.Unlock()
@@ -560,7 +566,7 @@ func (d *DB) deleteObsoleteFiles(jobID JobID) {
560566
}
561567
}
562568
if len(filesToDelete) > 0 {
563-
d.cleanupManager.EnqueueJob(jobID, filesToDelete)
569+
d.cleanupManager.EnqueueJob(jobID, filesToDelete, tableStats)
564570
}
565571
if d.opts.private.testingAlwaysWaitForCleanup {
566572
d.cleanupManager.Wait()
@@ -579,6 +585,49 @@ func (d *DB) maybeScheduleObsoleteTableDeletionLocked() {
579585
}
580586
}
581587

588+
func calculateObsoleteTableStats(objects []tableInfo) obsoleteTableStats {
589+
var stats obsoleteTableStats
590+
for _, o := range objects {
591+
if o.isLocal {
592+
stats.local.count++
593+
stats.local.size += o.FileSize
594+
}
595+
stats.total.count++
596+
stats.total.size += o.FileSize
597+
}
598+
return stats
599+
}
600+
601+
type obsoleteTableStats struct {
602+
local countAndSize
603+
total countAndSize
604+
}
605+
606+
func (s *obsoleteTableStats) Add(other obsoleteTableStats) {
607+
s.local.Add(other.local)
608+
s.total.Add(other.total)
609+
}
610+
611+
func (s *obsoleteTableStats) Sub(other obsoleteTableStats) {
612+
s.local.Sub(other.local)
613+
s.total.Sub(other.total)
614+
}
615+
616+
type countAndSize struct {
617+
count uint64
618+
size uint64
619+
}
620+
621+
func (c *countAndSize) Add(other countAndSize) {
622+
c.count += other.count
623+
c.size += other.size
624+
}
625+
626+
func (c *countAndSize) Sub(other countAndSize) {
627+
c.count = invariants.SafeSub(c.count, other.count)
628+
c.size = invariants.SafeSub(c.size, other.size)
629+
}
630+
582631
func merge(a, b []fileInfo) []fileInfo {
583632
if len(b) == 0 {
584633
return a

open.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ func Open(dirname string, opts *Options) (db *DB, err error) {
397397

398398
d.mu.log.manager = walManager
399399

400-
d.cleanupManager = openCleanupManager(opts, d.objProvider, d.onObsoleteTableDelete, d.getDeletionPacerInfo)
400+
d.cleanupManager = openCleanupManager(opts, d.objProvider, d.getDeletionPacerInfo)
401401

402402
if manifestExists && !opts.DisableConsistencyCheck {
403403
curVersion := d.mu.versions.currentVersion()

0 commit comments

Comments
 (0)