Skip to content

Commit ef47252

Browse files
authored
Add support to skip pinned timestamp in remote translog garbage collector (#15416)
Signed-off-by: Sachin Kale <[email protected]>
1 parent 4c98c7e commit ef47252

File tree

9 files changed

+1441
-94
lines changed

9 files changed

+1441
-94
lines changed

server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteSplitIndexIT.java

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
import org.opensearch.index.seqno.SeqNoStats;
6767
import org.opensearch.index.shard.IndexShard;
6868
import org.opensearch.indices.IndicesService;
69+
import org.opensearch.indices.RemoteStoreSettings;
6970
import org.opensearch.indices.replication.common.ReplicationType;
7071
import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase;
7172
import org.opensearch.test.OpenSearchIntegTestCase;
@@ -109,13 +110,16 @@ public void cleanUp() throws Exception {
109110
assertAcked(
110111
client().admin().indices().prepareDelete("*").setIndicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN_CLOSED_HIDDEN).get()
111112
);
112-
assertBusy(() -> {
113-
try {
114-
assertEquals(0, getFileCount(translogRepoPath));
115-
} catch (IOException e) {
116-
fail();
117-
}
118-
}, 30, TimeUnit.SECONDS);
113+
// With pinned timestamp, we can have tlog files even after deletion.
114+
if (RemoteStoreSettings.isPinnedTimestampsEnabled() == false) {
115+
assertBusy(() -> {
116+
try {
117+
assertEquals(0, getFileCount(translogRepoPath));
118+
} catch (IOException e) {
119+
fail();
120+
}
121+
}, 30, TimeUnit.SECONDS);
122+
}
119123
super.teardown();
120124
}
121125

server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java

Lines changed: 333 additions & 36 deletions
Large diffs are not rendered by default.

server/src/main/java/org/opensearch/index/translog/Translog.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,10 @@ TranslogReader openReader(Path path, Checkpoint checkpoint) throws IOException {
317317
*/
318318
public static long parseIdFromFileName(Path translogFile) {
319319
final String fileName = translogFile.getFileName().toString();
320+
return parseIdFromFileName(fileName);
321+
}
322+
323+
public static long parseIdFromFileName(String fileName) {
320324
final Matcher matcher = PARSE_STRICT_ID_PATTERN.matcher(fileName);
321325
if (matcher.matches()) {
322326
try {

server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java

Lines changed: 76 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,12 @@
4545
import java.util.Set;
4646
import java.util.concurrent.CountDownLatch;
4747
import java.util.concurrent.TimeUnit;
48+
import java.util.function.Function;
4849
import java.util.stream.Collectors;
4950

5051
import static org.opensearch.index.translog.transfer.FileSnapshot.TransferFileSnapshot;
5152
import static org.opensearch.index.translog.transfer.FileSnapshot.TranslogFileSnapshot;
53+
import static org.opensearch.index.translog.transfer.TranslogTransferMetadata.METADATA_SEPARATOR;
5254

5355
/**
5456
* The class responsible for orchestrating the transfer of a {@link TransferSnapshot} via a {@link TransferService}
@@ -337,35 +339,54 @@ private void deleteFileIfExists(Path filePath) throws IOException {
337339
}
338340
}
339341

342+
public TranslogTransferMetadata readMetadata(long pinnedTimestamp) throws IOException {
343+
if (pinnedTimestamp <= 0) {
344+
return readMetadata();
345+
}
346+
return readMetadata((blobMetadataList) -> {
347+
List<String> metadataFiles = blobMetadataList.stream().map(BlobMetadata::name).collect(Collectors.toList());
348+
Set<String> metadataFilesMatchingTimestamp = RemoteStoreUtils.getPinnedTimestampLockedFiles(
349+
metadataFiles,
350+
Set.of(pinnedTimestamp),
351+
file -> RemoteStoreUtils.invertLong(file.split(METADATA_SEPARATOR)[3]),
352+
TranslogTransferMetadata::getNodeIdByPrimaryTermAndGen,
353+
true
354+
);
355+
if (metadataFilesMatchingTimestamp.isEmpty()) {
356+
return null;
357+
}
358+
assert metadataFilesMatchingTimestamp.size() == 1 : "There should be only 1 metadata file matching given timestamp";
359+
return metadataFilesMatchingTimestamp.stream().findFirst().get();
360+
}, Integer.MAX_VALUE);
361+
}
362+
340363
public TranslogTransferMetadata readMetadata() throws IOException {
364+
return readMetadata((blobMetadataList) -> {
365+
RemoteStoreUtils.verifyNoMultipleWriters(
366+
blobMetadataList.stream().map(BlobMetadata::name).collect(Collectors.toList()),
367+
TranslogTransferMetadata::getNodeIdByPrimaryTermAndGen
368+
);
369+
return blobMetadataList.get(0).name();
370+
}, METADATA_FILES_TO_FETCH);
371+
}
372+
373+
private TranslogTransferMetadata readMetadata(Function<List<BlobMetadata>, String> getMetadataFileToRead, int numberOfFilesToFetch)
374+
throws IOException {
341375
SetOnce<TranslogTransferMetadata> metadataSetOnce = new SetOnce<>();
342376
SetOnce<IOException> exceptionSetOnce = new SetOnce<>();
343377
final CountDownLatch latch = new CountDownLatch(1);
344378
LatchedActionListener<List<BlobMetadata>> latchedActionListener = new LatchedActionListener<>(
345379
ActionListener.wrap(blobMetadataList -> {
346380
if (blobMetadataList.isEmpty()) return;
347-
RemoteStoreUtils.verifyNoMultipleWriters(
348-
blobMetadataList.stream().map(BlobMetadata::name).collect(Collectors.toList()),
349-
TranslogTransferMetadata::getNodeIdByPrimaryTermAndGen
350-
);
351-
String filename = blobMetadataList.get(0).name();
352-
boolean downloadStatus = false;
353-
long downloadStartTime = System.nanoTime(), bytesToRead = 0;
354-
try (InputStream inputStream = transferService.downloadBlob(remoteMetadataTransferPath, filename)) {
355-
// Capture number of bytes for stats before reading
356-
bytesToRead = inputStream.available();
357-
IndexInput indexInput = new ByteArrayIndexInput("metadata file", inputStream.readAllBytes());
358-
metadataSetOnce.set(metadataStreamWrapper.readStream(indexInput));
359-
downloadStatus = true;
381+
String filename = getMetadataFileToRead.apply(blobMetadataList);
382+
if (filename == null) {
383+
return;
384+
}
385+
try {
386+
metadataSetOnce.set(readMetadata(filename));
360387
} catch (IOException e) {
361388
logger.error(() -> new ParameterizedMessage("Exception while reading metadata file: {}", filename), e);
362389
exceptionSetOnce.set(e);
363-
} finally {
364-
remoteTranslogTransferTracker.addDownloadTimeInMillis((System.nanoTime() - downloadStartTime) / 1_000_000L);
365-
logger.debug("translogMetadataDownloadStatus={}", downloadStatus);
366-
if (downloadStatus) {
367-
remoteTranslogTransferTracker.addDownloadBytesSucceeded(bytesToRead);
368-
}
369390
}
370391
}, e -> {
371392
if (e instanceof RuntimeException) {
@@ -381,12 +402,14 @@ public TranslogTransferMetadata readMetadata() throws IOException {
381402
transferService.listAllInSortedOrder(
382403
remoteMetadataTransferPath,
383404
TranslogTransferMetadata.METADATA_PREFIX,
384-
METADATA_FILES_TO_FETCH,
405+
numberOfFilesToFetch,
385406
latchedActionListener
386407
);
387-
latch.await();
408+
if (latch.await(remoteStoreSettings.getClusterRemoteTranslogTransferTimeout().millis(), TimeUnit.MILLISECONDS) == false) {
409+
throw new RuntimeException("Timed out reading metadata file");
410+
}
388411
} catch (InterruptedException e) {
389-
throw new IOException("Exception while reading/downloading metadafile", e);
412+
throw new IOException("Exception while reading/downloading metadata file", e);
390413
}
391414

392415
if (exceptionSetOnce.get() != null) {
@@ -396,6 +419,26 @@ public TranslogTransferMetadata readMetadata() throws IOException {
396419
return metadataSetOnce.get();
397420
}
398421

422+
public TranslogTransferMetadata readMetadata(String metadataFilename) throws IOException {
423+
boolean downloadStatus = false;
424+
TranslogTransferMetadata translogTransferMetadata = null;
425+
long downloadStartTime = System.nanoTime(), bytesToRead = 0;
426+
try (InputStream inputStream = transferService.downloadBlob(remoteMetadataTransferPath, metadataFilename)) {
427+
// Capture number of bytes for stats before reading
428+
bytesToRead = inputStream.available();
429+
IndexInput indexInput = new ByteArrayIndexInput("metadata file", inputStream.readAllBytes());
430+
translogTransferMetadata = metadataStreamWrapper.readStream(indexInput);
431+
downloadStatus = true;
432+
} finally {
433+
remoteTranslogTransferTracker.addDownloadTimeInMillis((System.nanoTime() - downloadStartTime) / 1_000_000L);
434+
logger.debug("translogMetadataDownloadStatus={}", downloadStatus);
435+
if (downloadStatus) {
436+
remoteTranslogTransferTracker.addDownloadBytesSucceeded(bytesToRead);
437+
}
438+
}
439+
return translogTransferMetadata;
440+
}
441+
399442
private TransferFileSnapshot prepareMetadata(TransferSnapshot transferSnapshot) throws IOException {
400443
Map<String, String> generationPrimaryTermMap = transferSnapshot.getTranslogFileSnapshots().stream().map(s -> {
401444
assert s instanceof TranslogFileSnapshot;
@@ -549,6 +592,16 @@ public void onFailure(Exception e) {
549592
});
550593
}
551594

595+
public void listTranslogMetadataFilesAsync(ActionListener<List<BlobMetadata>> listener) {
596+
transferService.listAllInSortedOrderAsync(
597+
ThreadPool.Names.REMOTE_PURGE,
598+
remoteMetadataTransferPath,
599+
TranslogTransferMetadata.METADATA_PREFIX,
600+
Integer.MAX_VALUE,
601+
listener
602+
);
603+
}
604+
552605
public void deleteStaleTranslogMetadataFilesAsync(Runnable onCompletion) {
553606
try {
554607
transferService.listAllInSortedOrderAsync(
@@ -635,7 +688,7 @@ public void onFailure(Exception e) {
635688
* @param files list of metadata files to be deleted.
636689
* @param onCompletion runnable to run on completion of deletion regardless of success/failure.
637690
*/
638-
private void deleteMetadataFilesAsync(List<String> files, Runnable onCompletion) {
691+
public void deleteMetadataFilesAsync(List<String> files, Runnable onCompletion) {
639692
try {
640693
transferService.deleteBlobsAsync(ThreadPool.Names.REMOTE_PURGE, remoteMetadataTransferPath, files, new ActionListener<>() {
641694
@Override

server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferMetadata.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88

99
package org.opensearch.index.translog.transfer;
1010

11+
import org.apache.logging.log4j.LogManager;
12+
import org.apache.logging.log4j.Logger;
13+
import org.apache.logging.log4j.message.ParameterizedMessage;
14+
import org.opensearch.Version;
1115
import org.opensearch.common.SetOnce;
1216
import org.opensearch.common.collect.Tuple;
1317
import org.opensearch.index.remote.RemoteStoreUtils;
@@ -25,6 +29,8 @@
2529
*/
2630
public class TranslogTransferMetadata {
2731

32+
public static final Logger logger = LogManager.getLogger(TranslogTransferMetadata.class);
33+
2834
private final long primaryTerm;
2935

3036
private final long generation;
@@ -128,6 +134,24 @@ public static Tuple<String, String> getNodeIdByPrimaryTermAndGen(String filename
128134
return new Tuple<>(primaryTermAndGen, nodeId);
129135
}
130136

137+
public static Tuple<Long, Long> getMinMaxTranslogGenerationFromFilename(String filename) {
138+
String[] tokens = filename.split(METADATA_SEPARATOR);
139+
if (tokens.length < 7) {
140+
// For versions < 2.17, we don't have min translog generation.
141+
return null;
142+
}
143+
assert Version.CURRENT.onOrAfter(Version.V_2_17_0);
144+
try {
145+
// instead of direct index, we go backwards to avoid running into same separator in nodeId
146+
String minGeneration = tokens[tokens.length - 2];
147+
String maxGeneration = tokens[2];
148+
return new Tuple<>(RemoteStoreUtils.invertLong(minGeneration), RemoteStoreUtils.invertLong(maxGeneration));
149+
} catch (NumberFormatException e) {
150+
logger.error(() -> new ParameterizedMessage("Exception while getting min and max translog generation from: {}", filename), e);
151+
return null;
152+
}
153+
}
154+
131155
@Override
132156
public int hashCode() {
133157
return Objects.hash(primaryTerm, generation);

server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,11 @@ public static TimeValue getPinnedTimestampsLookbackInterval() {
292292
return pinnedTimestampsLookbackInterval;
293293
}
294294

295+
// Visible for testing
296+
public static void setPinnedTimestampsLookbackInterval(TimeValue pinnedTimestampsLookbackInterval) {
297+
RemoteStoreSettings.pinnedTimestampsLookbackInterval = pinnedTimestampsLookbackInterval;
298+
}
299+
295300
public static boolean isPinnedTimestampsEnabled() {
296301
return isPinnedTimestampsEnabled;
297302
}

0 commit comments

Comments
 (0)