Skip to content

Commit a019322

Browse files
remote publication checksum stats (#15957) (#15960)
* Remote publication checksum stats Signed-off-by: Himshikha Gupta <[email protected]>
1 parent 14fc1c5 commit a019322

File tree

5 files changed

+91
-4
lines changed

5 files changed

+91
-4
lines changed

server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING;
4343
import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY;
4444
import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER;
45+
import static org.opensearch.gateway.remote.RemoteDownloadStats.CHECKSUM_VALIDATION_FAILED_COUNT;
4546
import static org.opensearch.gateway.remote.model.RemoteClusterBlocks.CLUSTER_BLOCKS;
4647
import static org.opensearch.gateway.remote.model.RemoteCoordinationMetadata.COORDINATION_METADATA;
4748
import static org.opensearch.gateway.remote.model.RemoteCustomMetadata.CUSTOM_METADATA;
@@ -228,10 +229,28 @@ private void assertDataNodeDownloadStats(NodesStatsResponse nodesStatsResponse)
228229
assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getSuccessCount() > 0);
229230
assertEquals(0, dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getFailedCount());
230231
assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getTotalTimeInMillis() > 0);
232+
assertEquals(
233+
0,
234+
dataNodeDiscoveryStats.getClusterStateStats()
235+
.getPersistenceStats()
236+
.get(0)
237+
.getExtendedFields()
238+
.get(CHECKSUM_VALIDATION_FAILED_COUNT)
239+
.get()
240+
);
231241

232242
assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getSuccessCount() > 0);
233243
assertEquals(0, dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getFailedCount());
234244
assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getTotalTimeInMillis() > 0);
245+
assertEquals(
246+
0,
247+
dataNodeDiscoveryStats.getClusterStateStats()
248+
.getPersistenceStats()
249+
.get(1)
250+
.getExtendedFields()
251+
.get(CHECKSUM_VALIDATION_FAILED_COUNT)
252+
.get()
253+
);
235254
}
236255

237256
private Map<String, Integer> getMetadataFiles(BlobStoreRepository repository, String subDirectory) throws IOException {

server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1619,6 +1619,12 @@ void validateClusterStateFromChecksum(
16191619
failedValidation
16201620
)
16211621
);
1622+
if (isFullStateDownload) {
1623+
remoteStateStats.stateFullDownloadValidationFailed();
1624+
} else {
1625+
remoteStateStats.stateDiffDownloadValidationFailed();
1626+
}
1627+
16221628
if (isFullStateDownload && remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.FAILURE)) {
16231629
throw new IllegalStateException(
16241630
"Cluster state checksums do not match during full state read. Validation failed for " + failedValidation
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.gateway.remote;
10+
11+
import org.opensearch.cluster.coordination.PersistedStateStats;
12+
13+
import java.util.concurrent.atomic.AtomicLong;
14+
15+
/**
16+
* Download stats for remote state
17+
*
18+
* @opensearch.internal
19+
*/
20+
public class RemoteDownloadStats extends PersistedStateStats {
21+
static final String CHECKSUM_VALIDATION_FAILED_COUNT = "checksum_validation_failed_count";
22+
private AtomicLong checksumValidationFailedCount = new AtomicLong(0);
23+
24+
public RemoteDownloadStats(String statsName) {
25+
super(statsName);
26+
addToExtendedFields(CHECKSUM_VALIDATION_FAILED_COUNT, checksumValidationFailedCount);
27+
}
28+
29+
public void checksumValidationFailedCount() {
30+
checksumValidationFailedCount.incrementAndGet();
31+
}
32+
33+
public long getChecksumValidationFailedCount() {
34+
return checksumValidationFailedCount.get();
35+
}
36+
}

server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,16 @@
1818
public class RemotePersistenceStats {
1919

2020
RemoteUploadStats remoteUploadStats;
21-
PersistedStateStats remoteDiffDownloadStats;
22-
PersistedStateStats remoteFullDownloadStats;
21+
RemoteDownloadStats remoteDiffDownloadStats;
22+
RemoteDownloadStats remoteFullDownloadStats;
2323

2424
final String FULL_DOWNLOAD_STATS = "remote_full_download";
2525
final String DIFF_DOWNLOAD_STATS = "remote_diff_download";
2626

2727
public RemotePersistenceStats() {
2828
remoteUploadStats = new RemoteUploadStats();
29-
remoteDiffDownloadStats = new PersistedStateStats(DIFF_DOWNLOAD_STATS);
30-
remoteFullDownloadStats = new PersistedStateStats(FULL_DOWNLOAD_STATS);
29+
remoteDiffDownloadStats = new RemoteDownloadStats(DIFF_DOWNLOAD_STATS);
30+
remoteFullDownloadStats = new RemoteDownloadStats(FULL_DOWNLOAD_STATS);
3131
}
3232

3333
public void cleanUpAttemptFailed() {
@@ -90,6 +90,22 @@ public void stateDiffDownloadFailed() {
9090
remoteDiffDownloadStats.stateFailed();
9191
}
9292

93+
public void stateDiffDownloadValidationFailed() {
94+
remoteDiffDownloadStats.checksumValidationFailedCount();
95+
}
96+
97+
public void stateFullDownloadValidationFailed() {
98+
remoteFullDownloadStats.checksumValidationFailedCount();
99+
}
100+
101+
public long getStateDiffDownloadValidationFailed() {
102+
return remoteDiffDownloadStats.getChecksumValidationFailedCount();
103+
}
104+
105+
public long getStateFullDownloadValidationFailed() {
106+
return remoteFullDownloadStats.getChecksumValidationFailedCount();
107+
}
108+
93109
public PersistedStateStats getUploadStats() {
94110
return remoteUploadStats;
95111
}

server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3232,6 +3232,7 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabledWithNullC
32323232
anyString(),
32333233
anyBoolean()
32343234
);
3235+
assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed());
32353236
}
32363237

32373238
public void testGetClusterStateForManifestWithChecksumValidationEnabled() throws IOException {
@@ -3264,6 +3265,7 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabled() throws
32643265
);
32653266
mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true);
32663267
verify(mockService, times(1)).validateClusterStateFromChecksum(manifest, clusterState, ClusterName.DEFAULT.value(), NODE_ID, true);
3268+
assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed());
32673269
}
32683270

32693271
public void testGetClusterStateForManifestWithChecksumValidationModeNone() throws IOException {
@@ -3296,6 +3298,7 @@ public void testGetClusterStateForManifestWithChecksumValidationModeNone() throw
32963298
);
32973299
mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true);
32983300
verify(mockService, times(0)).validateClusterStateFromChecksum(any(), any(), anyString(), anyString(), anyBoolean());
3301+
assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed());
32993302
}
33003303

33013304
public void testGetClusterStateForManifestWithChecksumValidationEnabledWithMismatch() throws IOException {
@@ -3338,6 +3341,7 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabledWithMisma
33383341
NODE_ID,
33393342
true
33403343
);
3344+
assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed());
33413345
}
33423346

33433347
public void testGetClusterStateForManifestWithChecksumValidationDebugWithMismatch() throws IOException {
@@ -3384,6 +3388,7 @@ public void testGetClusterStateForManifestWithChecksumValidationDebugWithMismatc
33843388
NODE_ID,
33853389
true
33863390
);
3391+
assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed());
33873392
}
33883393

33893394
public void testGetClusterStateUsingDiffWithChecksum() throws IOException {
@@ -3425,6 +3430,7 @@ public void testGetClusterStateUsingDiffWithChecksum() throws IOException {
34253430
eq(NODE_ID),
34263431
eq(false)
34273432
);
3433+
assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed());
34283434
}
34293435

34303436
public void testGetClusterStateUsingDiffWithChecksumModeNone() throws IOException {
@@ -3466,6 +3472,7 @@ public void testGetClusterStateUsingDiffWithChecksumModeNone() throws IOExceptio
34663472
eq(NODE_ID),
34673473
eq(false)
34683474
);
3475+
assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed());
34693476
}
34703477

34713478
public void testGetClusterStateUsingDiffWithChecksumModeDebugMismatch() throws IOException {
@@ -3506,6 +3513,7 @@ public void testGetClusterStateUsingDiffWithChecksumModeDebugMismatch() throws I
35063513
eq(NODE_ID),
35073514
eq(false)
35083515
);
3516+
assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed());
35093517
}
35103518

35113519
public void testGetClusterStateUsingDiffWithChecksumModeTraceMismatch() throws IOException {
@@ -3567,6 +3575,7 @@ public void testGetClusterStateUsingDiffWithChecksumModeTraceMismatch() throws I
35673575
eq(NODE_ID),
35683576
eq(false)
35693577
);
3578+
assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed());
35703579
}
35713580

35723581
public void testGetClusterStateUsingDiffWithChecksumMismatch() throws IOException {
@@ -3628,6 +3637,7 @@ public void testGetClusterStateUsingDiffWithChecksumMismatch() throws IOExceptio
36283637
eq(NODE_ID),
36293638
eq(false)
36303639
);
3640+
assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed());
36313641
}
36323642

36333643
private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> clusterUUIDsPointers) throws IOException {

0 commit comments

Comments
 (0)