Skip to content

Commit a2fb086

Browse files
corradocmumford
authored andcommitted
Add option for max file size. The currend hard-coded value of 2M is inefficient in colossus.
------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=134391640
1 parent 3080a45 commit a2fb086

File tree

6 files changed

+79
-32
lines changed

6 files changed

+79
-32
lines changed

db/db_bench.cc

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,14 @@ static bool FLAGS_histogram = false;
8484
// (initialized to default value by "main")
8585
static int FLAGS_write_buffer_size = 0;
8686

87+
// Number of bytes written to each file.
88+
// (initialized to default value by "main")
89+
static int FLAGS_max_file_size = 0;
90+
91+
// Approximate size of user data packed per block (before compression.
92+
// (initialized to default value by "main")
93+
static int FLAGS_block_size = 0;
94+
8795
// Number of bytes to use as a cache of uncompressed data.
8896
// Negative means use default settings.
8997
static int FLAGS_cache_size = -1;
@@ -109,6 +117,7 @@ static const char* FLAGS_db = NULL;
109117
namespace leveldb {
110118

111119
namespace {
120+
leveldb::Env* g_env = NULL;
112121

113122
// Helper for quickly generating random data.
114123
class RandomGenerator {
@@ -186,7 +195,7 @@ class Stats {
186195
done_ = 0;
187196
bytes_ = 0;
188197
seconds_ = 0;
189-
start_ = Env::Default()->NowMicros();
198+
start_ = g_env->NowMicros();
190199
finish_ = start_;
191200
message_.clear();
192201
}
@@ -204,7 +213,7 @@ class Stats {
204213
}
205214

206215
void Stop() {
207-
finish_ = Env::Default()->NowMicros();
216+
finish_ = g_env->NowMicros();
208217
seconds_ = (finish_ - start_) * 1e-6;
209218
}
210219

@@ -214,7 +223,7 @@ class Stats {
214223

215224
void FinishedSingleOp() {
216225
if (FLAGS_histogram) {
217-
double now = Env::Default()->NowMicros();
226+
double now = g_env->NowMicros();
218227
double micros = now - last_op_finish_;
219228
hist_.Add(micros);
220229
if (micros > 20000) {
@@ -404,10 +413,10 @@ class Benchmark {
404413
reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
405414
heap_counter_(0) {
406415
std::vector<std::string> files;
407-
Env::Default()->GetChildren(FLAGS_db, &files);
416+
g_env->GetChildren(FLAGS_db, &files);
408417
for (size_t i = 0; i < files.size(); i++) {
409418
if (Slice(files[i]).starts_with("heap-")) {
410-
Env::Default()->DeleteFile(std::string(FLAGS_db) + "/" + files[i]);
419+
g_env->DeleteFile(std::string(FLAGS_db) + "/" + files[i]);
411420
}
412421
}
413422
if (!FLAGS_use_existing_db) {
@@ -589,7 +598,7 @@ class Benchmark {
589598
arg[i].shared = &shared;
590599
arg[i].thread = new ThreadState(i);
591600
arg[i].thread->shared = &shared;
592-
Env::Default()->StartThread(ThreadBody, &arg[i]);
601+
g_env->StartThread(ThreadBody, &arg[i]);
593602
}
594603

595604
shared.mu.Lock();
@@ -700,9 +709,12 @@ class Benchmark {
700709
void Open() {
701710
assert(db_ == NULL);
702711
Options options;
712+
options.env = g_env;
703713
options.create_if_missing = !FLAGS_use_existing_db;
704714
options.block_cache = cache_;
705715
options.write_buffer_size = FLAGS_write_buffer_size;
716+
options.max_file_size = FLAGS_max_file_size;
717+
options.block_size = FLAGS_block_size;
706718
options.max_open_files = FLAGS_open_files;
707719
options.filter_policy = filter_policy_;
708720
options.reuse_logs = FLAGS_reuse_logs;
@@ -925,7 +937,7 @@ class Benchmark {
925937
char fname[100];
926938
snprintf(fname, sizeof(fname), "%s/heap-%04d", FLAGS_db, ++heap_counter_);
927939
WritableFile* file;
928-
Status s = Env::Default()->NewWritableFile(fname, &file);
940+
Status s = g_env->NewWritableFile(fname, &file);
929941
if (!s.ok()) {
930942
fprintf(stderr, "%s\n", s.ToString().c_str());
931943
return;
@@ -934,7 +946,7 @@ class Benchmark {
934946
delete file;
935947
if (!ok) {
936948
fprintf(stderr, "heap profiling not supported\n");
937-
Env::Default()->DeleteFile(fname);
949+
g_env->DeleteFile(fname);
938950
}
939951
}
940952
};
@@ -943,6 +955,8 @@ class Benchmark {
943955

944956
int main(int argc, char** argv) {
945957
FLAGS_write_buffer_size = leveldb::Options().write_buffer_size;
958+
FLAGS_max_file_size = leveldb::Options().max_file_size;
959+
FLAGS_block_size = leveldb::Options().block_size;
946960
FLAGS_open_files = leveldb::Options().max_open_files;
947961
std::string default_db_path;
948962

@@ -973,6 +987,10 @@ int main(int argc, char** argv) {
973987
FLAGS_value_size = n;
974988
} else if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) {
975989
FLAGS_write_buffer_size = n;
990+
} else if (sscanf(argv[i], "--max_file_size=%d%c", &n, &junk) == 1) {
991+
FLAGS_max_file_size = n;
992+
} else if (sscanf(argv[i], "--block_size=%d%c", &n, &junk) == 1) {
993+
FLAGS_block_size = n;
976994
} else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) {
977995
FLAGS_cache_size = n;
978996
} else if (sscanf(argv[i], "--bloom_bits=%d%c", &n, &junk) == 1) {
@@ -987,9 +1005,11 @@ int main(int argc, char** argv) {
9871005
}
9881006
}
9891007

1008+
leveldb::g_env = leveldb::Env::Default();
1009+
9901010
// Choose a location for the test database if none given with --db=<path>
9911011
if (FLAGS_db == NULL) {
992-
leveldb::Env::Default()->GetTestDirectory(&default_db_path);
1012+
leveldb::g_env->GetTestDirectory(&default_db_path);
9931013
default_db_path += "/dbbench";
9941014
FLAGS_db = default_db_path.c_str();
9951015
}

db/db_impl.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ Options SanitizeOptions(const std::string& dbname,
9696
result.filter_policy = (src.filter_policy != NULL) ? ipolicy : NULL;
9797
ClipToRange(&result.max_open_files, 64 + kNumNonTableCacheFiles, 50000);
9898
ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
99+
ClipToRange(&result.max_file_size, 1<<20, 1<<30);
99100
ClipToRange(&result.block_size, 1<<10, 4<<20);
100101
if (result.info_log == NULL) {
101102
// Open a log file in the same directory as the db

db/version_set.cc

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,30 +20,39 @@
2020

2121
namespace leveldb {
2222

23-
static const int kTargetFileSize = 2 * 1048576;
23+
static int TargetFileSize(const Options* options) {
24+
return options->max_file_size;
25+
}
2426

2527
// Maximum bytes of overlaps in grandparent (i.e., level+2) before we
2628
// stop building a single file in a level->level+1 compaction.
27-
static const int64_t kMaxGrandParentOverlapBytes = 10 * kTargetFileSize;
29+
static int64_t MaxGrandParentOverlapBytes(const Options* options) {
30+
return 10 * TargetFileSize(options);
31+
}
2832

2933
// Maximum number of bytes in all compacted files. We avoid expanding
3034
// the lower level file set of a compaction if it would make the
3135
// total compaction cover more than this many bytes.
32-
static const int64_t kExpandedCompactionByteSizeLimit = 25 * kTargetFileSize;
36+
static int64_t ExpandedCompactionByteSizeLimit(const Options* options) {
37+
return 25 * TargetFileSize(options);
38+
}
3339

34-
static double MaxBytesForLevel(int level) {
40+
static double MaxBytesForLevel(const Options* options, int level) {
3541
// Note: the result for level zero is not really used since we set
3642
// the level-0 compaction threshold based on number of files.
37-
double result = 10 * 1048576.0; // Result for both level-0 and level-1
43+
44+
// Result for both level-0 and level-1
45+
double result = 10. * 1048576.0;
3846
while (level > 1) {
3947
result *= 10;
4048
level--;
4149
}
4250
return result;
4351
}
4452

45-
static uint64_t MaxFileSizeForLevel(int level) {
46-
return kTargetFileSize; // We could vary per level to reduce number of files?
53+
static uint64_t MaxFileSizeForLevel(const Options* options, int level) {
54+
// We could vary per level to reduce number of files?
55+
return TargetFileSize(options);
4756
}
4857

4958
static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
@@ -508,7 +517,7 @@ int Version::PickLevelForMemTableOutput(
508517
// Check that file does not overlap too many grandparent bytes.
509518
GetOverlappingInputs(level + 2, &start, &limit, &overlaps);
510519
const int64_t sum = TotalFileSize(overlaps);
511-
if (sum > kMaxGrandParentOverlapBytes) {
520+
if (sum > MaxGrandParentOverlapBytes(vset_->options_)) {
512521
break;
513522
}
514523
}
@@ -1027,7 +1036,7 @@ bool VersionSet::ReuseManifest(const std::string& dscname,
10271036
manifest_type != kDescriptorFile ||
10281037
!env_->GetFileSize(dscname, &manifest_size).ok() ||
10291038
// Make new compacted MANIFEST if old one is too big
1030-
manifest_size >= kTargetFileSize) {
1039+
manifest_size >= TargetFileSize(options_)) {
10311040
return false;
10321041
}
10331042

@@ -1076,7 +1085,8 @@ void VersionSet::Finalize(Version* v) {
10761085
} else {
10771086
// Compute the ratio of current size to size limit.
10781087
const uint64_t level_bytes = TotalFileSize(v->files_[level]);
1079-
score = static_cast<double>(level_bytes) / MaxBytesForLevel(level);
1088+
score =
1089+
static_cast<double>(level_bytes) / MaxBytesForLevel(options_, level);
10801090
}
10811091

10821092
if (score > best_score) {
@@ -1290,7 +1300,7 @@ Compaction* VersionSet::PickCompaction() {
12901300
level = current_->compaction_level_;
12911301
assert(level >= 0);
12921302
assert(level+1 < config::kNumLevels);
1293-
c = new Compaction(level);
1303+
c = new Compaction(options_, level);
12941304

12951305
// Pick the first file that comes after compact_pointer_[level]
12961306
for (size_t i = 0; i < current_->files_[level].size(); i++) {
@@ -1307,7 +1317,7 @@ Compaction* VersionSet::PickCompaction() {
13071317
}
13081318
} else if (seek_compaction) {
13091319
level = current_->file_to_compact_level_;
1310-
c = new Compaction(level);
1320+
c = new Compaction(options_, level);
13111321
c->inputs_[0].push_back(current_->file_to_compact_);
13121322
} else {
13131323
return NULL;
@@ -1352,7 +1362,8 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
13521362
const int64_t inputs1_size = TotalFileSize(c->inputs_[1]);
13531363
const int64_t expanded0_size = TotalFileSize(expanded0);
13541364
if (expanded0.size() > c->inputs_[0].size() &&
1355-
inputs1_size + expanded0_size < kExpandedCompactionByteSizeLimit) {
1365+
inputs1_size + expanded0_size <
1366+
ExpandedCompactionByteSizeLimit(options_)) {
13561367
InternalKey new_start, new_limit;
13571368
GetRange(expanded0, &new_start, &new_limit);
13581369
std::vector<FileMetaData*> expanded1;
@@ -1414,7 +1425,7 @@ Compaction* VersionSet::CompactRange(
14141425
// and we must not pick one file and drop another older file if the
14151426
// two files overlap.
14161427
if (level > 0) {
1417-
const uint64_t limit = MaxFileSizeForLevel(level);
1428+
const uint64_t limit = MaxFileSizeForLevel(options_, level);
14181429
uint64_t total = 0;
14191430
for (size_t i = 0; i < inputs.size(); i++) {
14201431
uint64_t s = inputs[i]->file_size;
@@ -1426,17 +1437,17 @@ Compaction* VersionSet::CompactRange(
14261437
}
14271438
}
14281439

1429-
Compaction* c = new Compaction(level);
1440+
Compaction* c = new Compaction(options_, level);
14301441
c->input_version_ = current_;
14311442
c->input_version_->Ref();
14321443
c->inputs_[0] = inputs;
14331444
SetupOtherInputs(c);
14341445
return c;
14351446
}
14361447

1437-
Compaction::Compaction(int level)
1448+
Compaction::Compaction(const Options* options, int level)
14381449
: level_(level),
1439-
max_output_file_size_(MaxFileSizeForLevel(level)),
1450+
max_output_file_size_(MaxFileSizeForLevel(options, level)),
14401451
input_version_(NULL),
14411452
grandparent_index_(0),
14421453
seen_key_(false),
@@ -1453,12 +1464,13 @@ Compaction::~Compaction() {
14531464
}
14541465

14551466
bool Compaction::IsTrivialMove() const {
1467+
const VersionSet* vset = input_version_->vset_;
14561468
// Avoid a move if there is lots of overlapping grandparent data.
14571469
// Otherwise, the move could create a parent file that will require
14581470
// a very expensive merge later on.
1459-
return (num_input_files(0) == 1 &&
1460-
num_input_files(1) == 0 &&
1461-
TotalFileSize(grandparents_) <= kMaxGrandParentOverlapBytes);
1471+
return (num_input_files(0) == 1 && num_input_files(1) == 0 &&
1472+
TotalFileSize(grandparents_) <=
1473+
MaxGrandParentOverlapBytes(vset->options_));
14621474
}
14631475

14641476
void Compaction::AddInputDeletions(VersionEdit* edit) {
@@ -1491,8 +1503,9 @@ bool Compaction::IsBaseLevelForKey(const Slice& user_key) {
14911503
}
14921504

14931505
bool Compaction::ShouldStopBefore(const Slice& internal_key) {
1506+
const VersionSet* vset = input_version_->vset_;
14941507
// Scan to find earliest grandparent file that contains key.
1495-
const InternalKeyComparator* icmp = &input_version_->vset_->icmp_;
1508+
const InternalKeyComparator* icmp = &vset->icmp_;
14961509
while (grandparent_index_ < grandparents_.size() &&
14971510
icmp->Compare(internal_key,
14981511
grandparents_[grandparent_index_]->largest.Encode()) > 0) {
@@ -1503,7 +1516,7 @@ bool Compaction::ShouldStopBefore(const Slice& internal_key) {
15031516
}
15041517
seen_key_ = true;
15051518

1506-
if (overlapped_bytes_ > kMaxGrandParentOverlapBytes) {
1519+
if (overlapped_bytes_ > MaxGrandParentOverlapBytes(vset->options_)) {
15071520
// Too much overlap for current output; start new output
15081521
overlapped_bytes_ = 0;
15091522
return true;

db/version_set.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ class Compaction {
366366
friend class Version;
367367
friend class VersionSet;
368368

369-
explicit Compaction(int level);
369+
Compaction(const Options* options, int level);
370370

371371
int level_;
372372
uint64_t max_output_file_size_;

include/leveldb/options.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,18 @@ struct Options {
112112
// Default: 16
113113
int block_restart_interval;
114114

115+
// Leveldb will write up to this amount of bytes to a file before
116+
// switching to a new one.
117+
// Most clients should leave this parameter alone. However if your
118+
// filesystem is more efficient with larger files, you could
119+
// consider increasing the value. The downside will be longer
120+
// compactions and hence longer latency/performance hiccups.
121+
// Another reason to increase this parameter might be when you are
122+
// initially populating a large database.
123+
//
124+
// Default: 2MB
125+
size_t max_file_size;
126+
115127
// Compress blocks using the specified compression algorithm. This
116128
// parameter can be changed dynamically.
117129
//

util/options.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Options::Options()
2121
block_cache(NULL),
2222
block_size(4096),
2323
block_restart_interval(16),
24+
max_file_size(2<<20),
2425
compression(kSnappyCompression),
2526
reuse_logs(false),
2627
filter_policy(NULL) {

0 commit comments

Comments
 (0)