Skip to content

Commit b8fb112

Browse files
committed
Correct how many log bytes are written during restore
20250326-182125-jzhou-375d243c097c3b5a 20250325-221139-jzhou-5dac71c4525d414c 100k backup tests: 20250326-162525-jzhou-f08e3fc12887a3e9
1 parent 0b3a0e0 commit b8fb112

File tree

1 file changed

+34
-10
lines changed

1 file changed

+34
-10
lines changed

fdbclient/FileBackupAgent.actor.cpp

+34-10
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include "flow/Trace.h"
5353

5454
#include <cinttypes>
55+
#include <cstdint>
5556
#include <ctime>
5657
#include <climits>
5758
#include "flow/IAsyncFile.h"
@@ -5084,6 +5085,7 @@ struct RestoreLogDataPartitionedTaskFunc : RestoreFileTaskFuncBase {
50845085
static TaskParam<int64_t> maxTagID() { return __FUNCTION__sr; }
50855086
static TaskParam<Version> beginVersion() { return __FUNCTION__sr; }
50865087
static TaskParam<Version> endVersion() { return __FUNCTION__sr; }
5088+
static TaskParam<int64_t> bytesWritten() { return __FUNCTION__sr; }
50875089
static TaskParam<std::vector<RestoreConfig::RestoreFile>> logs() { return __FUNCTION__sr; }
50885090
} Params;
50895091

@@ -5173,7 +5175,8 @@ struct RestoreLogDataPartitionedTaskFunc : RestoreFileTaskFuncBase {
51735175
// Writes backup mutations to the database
51745176
ACTOR static Future<Void> writeMutations(Database cx,
51755177
std::vector<Standalone<VectorRef<KeyValueRef>>> mutations,
5176-
Key mutationLogPrefix) {
5178+
Key mutationLogPrefix,
5179+
Reference<Task> task) {
51775180
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
51785181
state Standalone<VectorRef<KeyValueRef>> oldFormatMutations;
51795182
state int mutationIndex = 0;
@@ -5210,6 +5213,14 @@ struct RestoreLogDataPartitionedTaskFunc : RestoreFileTaskFuncBase {
52105213
++mutationCount;
52115214
}
52125215
wait(tr->commit());
5216+
int64_t oldBytes = Params.bytesWritten().get(task);
5217+
Params.bytesWritten().set(task, oldBytes + txBytes);
5218+
DisabledTraceEvent("FileRestorePartitionedLogCommittData")
5219+
.detail("TaskInstance", THIS_ADDR)
5220+
.detail("MutationIndex", mutationIndex)
5221+
.detail("MutationCount", mutationCount)
5222+
.detail("TotalMutation", totalMutation)
5223+
.detail("Bytes", txBytes);
52135224
mutationIndex += mutationCount; // update mutationIndex after the commit succeeds
52145225
} catch (Error& e) {
52155226
if (e.code() == error_code_transaction_too_large) {
@@ -5291,7 +5302,7 @@ struct RestoreLogDataPartitionedTaskFunc : RestoreFileTaskFuncBase {
52915302
makeReference<PartitionedLogIteratorTwoBuffers>(bc, k, filesByTag[k], fileEndVersionByTag[k]);
52925303
}
52935304

5294-
DisabledTraceEvent("RestoredPartitionedLogDataExeStart")
5305+
DisabledTraceEvent("FileRestorePartitionedLogDataExeStart")
52955306
.detail("BeginVersion", begin)
52965307
.detail("EndVersion", end)
52975308
.detail("Files", logs.size())
@@ -5312,7 +5323,7 @@ struct RestoreLogDataPartitionedTaskFunc : RestoreFileTaskFuncBase {
53125323
// batching mutations from multiple versions together before writing to the database
53135324
state int64_t bytes = oneVersionData.expectedSize();
53145325
if (totalBytes + bytes > CLIENT_KNOBS->RESTORE_WRITE_TX_SIZE) {
5315-
wait(writeMutations(cx, mutations, restore.mutationLogPrefix()));
5326+
wait(writeMutations(cx, mutations, restore.mutationLogPrefix(), task));
53165327
mutations.clear();
53175328
totalBytes = 0;
53185329
}
@@ -5321,15 +5332,15 @@ struct RestoreLogDataPartitionedTaskFunc : RestoreFileTaskFuncBase {
53215332
} catch (Error& e) {
53225333
if (e.code() == error_code_end_of_stream) {
53235334
if (mutations.size() > 0) {
5324-
wait(writeMutations(cx, mutations, restore.mutationLogPrefix()));
5335+
wait(writeMutations(cx, mutations, restore.mutationLogPrefix(), task));
53255336
}
53265337
break;
53275338
} else {
53285339
throw;
53295340
}
53305341
}
53315342
}
5332-
DisabledTraceEvent("RestoredPartitionedLogDataExeDone")
5343+
DisabledTraceEvent("FileRestorePartitionedLogDataExeDone")
53335344
.detail("BeginVersion", begin)
53345345
.detail("EndVersion", end)
53355346
.detail("Files", logs.size())
@@ -5342,7 +5353,19 @@ struct RestoreLogDataPartitionedTaskFunc : RestoreFileTaskFuncBase {
53425353
Reference<TaskBucket> taskBucket,
53435354
Reference<FutureBucket> futureBucket,
53445355
Reference<Task> task) {
5345-
RestoreConfig(task).fileBlocksFinished().atomicOp(tr, 1, MutationRef::Type::AddValue);
5356+
state int64_t logBytesWritten = Params.bytesWritten().get(task);
5357+
RestoreConfig(task).bytesWritten().atomicOp(tr, logBytesWritten, MutationRef::Type::AddValue);
5358+
5359+
int64_t blocks =
5360+
(logBytesWritten + CLIENT_KNOBS->BACKUP_LOGFILE_BLOCK_SIZE - 1) / CLIENT_KNOBS->BACKUP_LOGFILE_BLOCK_SIZE;
5361+
// When dispatching, we don't know how many blocks are there, so we have to do it here
5362+
RestoreConfig(task).filesBlocksDispatched().atomicOp(tr, blocks, MutationRef::Type::AddValue);
5363+
RestoreConfig(task).fileBlocksFinished().atomicOp(tr, blocks, MutationRef::Type::AddValue);
5364+
5365+
DisabledTraceEvent("FileRestorePartitionedLogCommittedData")
5366+
.detail("TaskInstance", THIS_ADDR)
5367+
.detail("Blocks", blocks)
5368+
.detail("LogBytes", logBytesWritten);
53465369

53475370
state Reference<TaskFuture> taskFuture = futureBucket->unpack(task->params[Task::reservedTaskParamKeyDone]);
53485371
wait(taskFuture->set(tr, taskBucket) && taskBucket->finish(tr, task));
@@ -5370,6 +5393,7 @@ struct RestoreLogDataPartitionedTaskFunc : RestoreFileTaskFuncBase {
53705393
Params.beginVersion().set(task, begin);
53715394
Params.endVersion().set(task, end);
53725395
Params.logs().set(task, logs);
5396+
Params.bytesWritten().set(task, 0);
53735397

53745398
if (!waitFor) {
53755399
return taskBucket->addTask(tr, task);
@@ -5450,7 +5474,7 @@ struct RestoreDispatchPartitionedTaskFunc : RestoreTaskFuncBase {
54505474
wait(success(RestoreDispatchPartitionedTaskFunc::addTask(
54515475
tr, taskBucket, task, firstVersion, beginVersion, endVersion)));
54525476

5453-
TraceEvent("RestorePartitionDispatch")
5477+
TraceEvent("FileRestorePartitionDispatch")
54545478
.detail("RestoreUID", restore.getUid())
54555479
.detail("BeginVersion", beginVersion)
54565480
.detail("ApplyLag", applyLag)
@@ -5519,7 +5543,7 @@ struct RestoreDispatchPartitionedTaskFunc : RestoreTaskFuncBase {
55195543
// If apply lag is 0 then we are done so create the completion task
55205544
wait(success(RestoreCompleteTaskFunc::addTask(tr, taskBucket, task, TaskCompletionKey::noSignal())));
55215545

5522-
TraceEvent("RestorePartitionDispatch")
5546+
TraceEvent("FileRestorePartitionDispatch")
55235547
.detail("RestoreUID", restore.getUid())
55245548
.detail("BeginVersion", beginVersion)
55255549
.detail("ApplyLag", applyLag)
@@ -5535,7 +5559,7 @@ struct RestoreDispatchPartitionedTaskFunc : RestoreTaskFuncBase {
55355559
wait(success(RestoreDispatchPartitionedTaskFunc::addTask(
55365560
tr, taskBucket, task, firstVersion, beginVersion, endVersion)));
55375561

5538-
TraceEvent("RestorePartitionDispatch")
5562+
TraceEvent("FileRestorePartitionDispatch")
55395563
.detail("RestoreUID", restore.getUid())
55405564
.detail("BeginVersion", beginVersion)
55415565
.detail("ApplyLag", applyLag)
@@ -5589,7 +5613,7 @@ struct RestoreDispatchPartitionedTaskFunc : RestoreTaskFuncBase {
55895613
wait(waitForAll(addTaskFutures));
55905614
wait(taskBucket->finish(tr, task));
55915615

5592-
TraceEvent("RestorePartitionDispatch")
5616+
TraceEvent("FileRestorePartitionDispatch")
55935617
.detail("RestoreUID", restore.getUid())
55945618
.detail("BeginVersion", beginVersion)
55955619
.detail("EndVersion", endVersion)

0 commit comments

Comments
 (0)