11
11
import org .apache .logging .log4j .LogManager ;
12
12
import org .apache .logging .log4j .Logger ;
13
13
import org .apache .logging .log4j .message .ParameterizedMessage ;
14
+ import org .apache .lucene .index .CorruptIndexException ;
14
15
import org .opensearch .ExceptionsHelper ;
16
+ import org .opensearch .OpenSearchCorruptionException ;
15
17
import org .opensearch .action .support .ChannelActionListener ;
16
18
import org .opensearch .cluster .ClusterChangedEvent ;
17
19
import org .opensearch .cluster .ClusterStateListener ;
22
24
import org .opensearch .common .lifecycle .AbstractLifecycleComponent ;
23
25
import org .opensearch .common .settings .Settings ;
24
26
import org .opensearch .common .util .CancellableThreads ;
27
+ import org .opensearch .common .util .concurrent .AbstractRunnable ;
25
28
import org .opensearch .common .util .concurrent .ConcurrentCollections ;
26
29
import org .opensearch .core .action .ActionListener ;
27
30
import org .opensearch .core .index .shard .ShardId ;
30
33
import org .opensearch .index .shard .IndexEventListener ;
31
34
import org .opensearch .index .shard .IndexShard ;
32
35
import org .opensearch .index .shard .IndexShardState ;
36
+ import org .opensearch .index .store .Store ;
33
37
import org .opensearch .indices .IndicesService ;
34
38
import org .opensearch .indices .recovery .FileChunkRequest ;
35
39
import org .opensearch .indices .recovery .ForceSyncRequest ;
57
61
import static org .opensearch .indices .replication .SegmentReplicationSourceService .Actions .UPDATE_VISIBLE_CHECKPOINT ;
58
62
59
63
/**
60
- * Service class that handles incoming checkpoints to initiate replication events on replicas.
64
+ * Service class that orchestrates replication events on replicas.
61
65
*
62
66
* @opensearch.internal
63
67
*/
@@ -68,14 +72,17 @@ public class SegmentReplicationTargetService extends AbstractLifecycleComponent
68
72
private final ThreadPool threadPool ;
69
73
private final RecoverySettings recoverySettings ;
70
74
75
+ private final ReplicationCollection <SegmentReplicationTarget > onGoingReplications ;
76
+
77
+ private final Map <ShardId , SegmentReplicationState > completedReplications = ConcurrentCollections .newConcurrentMap ();
78
+
71
79
private final SegmentReplicationSourceFactory sourceFactory ;
72
80
73
81
protected final Map <ShardId , ReplicationCheckpoint > latestReceivedCheckpoint = ConcurrentCollections .newConcurrentMap ();
74
82
75
83
private final IndicesService indicesService ;
76
84
private final ClusterService clusterService ;
77
85
private final TransportService transportService ;
78
- private final SegmentReplicator replicator ;
79
86
80
87
/**
81
88
* The internal actions
@@ -87,7 +94,6 @@ public static class Actions {
87
94
public static final String FORCE_SYNC = "internal:index/shard/replication/segments_sync" ;
88
95
}
89
96
90
- @ Deprecated
91
97
public SegmentReplicationTargetService (
92
98
final ThreadPool threadPool ,
93
99
final RecoverySettings recoverySettings ,
@@ -107,7 +113,6 @@ public SegmentReplicationTargetService(
107
113
);
108
114
}
109
115
110
- @ Deprecated
111
116
public SegmentReplicationTargetService (
112
117
final ThreadPool threadPool ,
113
118
final RecoverySettings recoverySettings ,
@@ -116,34 +121,14 @@ public SegmentReplicationTargetService(
116
121
final IndicesService indicesService ,
117
122
final ClusterService clusterService ,
118
123
final ReplicationCollection <SegmentReplicationTarget > ongoingSegmentReplications
119
- ) {
120
- this (
121
- threadPool ,
122
- recoverySettings ,
123
- transportService ,
124
- sourceFactory ,
125
- indicesService ,
126
- clusterService ,
127
- new SegmentReplicator (threadPool )
128
- );
129
- }
130
-
131
- public SegmentReplicationTargetService (
132
- final ThreadPool threadPool ,
133
- final RecoverySettings recoverySettings ,
134
- final TransportService transportService ,
135
- final SegmentReplicationSourceFactory sourceFactory ,
136
- final IndicesService indicesService ,
137
- final ClusterService clusterService ,
138
- final SegmentReplicator replicator
139
124
) {
140
125
this .threadPool = threadPool ;
141
126
this .recoverySettings = recoverySettings ;
127
+ this .onGoingReplications = ongoingSegmentReplications ;
142
128
this .sourceFactory = sourceFactory ;
143
129
this .indicesService = indicesService ;
144
130
this .clusterService = clusterService ;
145
131
this .transportService = transportService ;
146
- this .replicator = replicator ;
147
132
148
133
transportService .registerRequestHandler (
149
134
Actions .FILE_CHUNK ,
@@ -169,7 +154,7 @@ protected void doStart() {
169
154
@ Override
170
155
protected void doStop () {
171
156
if (DiscoveryNode .isDataNode (clusterService .getSettings ())) {
172
- assert replicator .size () == 0 : "Replication collection should be empty on shutdown" ;
157
+ assert onGoingReplications .size () == 0 : "Replication collection should be empty on shutdown" ;
173
158
clusterService .removeListener (this );
174
159
}
175
160
}
@@ -214,7 +199,7 @@ public void clusterChanged(ClusterChangedEvent event) {
214
199
@ Override
215
200
public void beforeIndexShardClosed (ShardId shardId , @ Nullable IndexShard indexShard , Settings indexSettings ) {
216
201
if (indexShard != null && indexShard .indexSettings ().isSegRepEnabledOrRemoteNode ()) {
217
- replicator . cancel (indexShard .shardId (), "Shard closing" );
202
+ onGoingReplications . cancelForShard (indexShard .shardId (), "Shard closing" );
218
203
latestReceivedCheckpoint .remove (shardId );
219
204
}
220
205
}
@@ -239,7 +224,7 @@ public void shardRoutingChanged(IndexShard indexShard, @Nullable ShardRouting ol
239
224
&& indexShard .indexSettings ().isSegRepEnabledOrRemoteNode ()
240
225
&& oldRouting .primary () == false
241
226
&& newRouting .primary ()) {
242
- replicator . cancel (indexShard .shardId (), "Shard has been promoted to primary" );
227
+ onGoingReplications . cancelForShard (indexShard .shardId (), "Shard has been promoted to primary" );
243
228
latestReceivedCheckpoint .remove (indexShard .shardId ());
244
229
}
245
230
}
@@ -249,15 +234,17 @@ public void shardRoutingChanged(IndexShard indexShard, @Nullable ShardRouting ol
249
234
*/
250
235
@ Nullable
251
236
public SegmentReplicationState getOngoingEventSegmentReplicationState (ShardId shardId ) {
252
- return Optional .ofNullable (replicator .get (shardId )).map (SegmentReplicationTarget ::state ).orElse (null );
237
+ return Optional .ofNullable (onGoingReplications .getOngoingReplicationTarget (shardId ))
238
+ .map (SegmentReplicationTarget ::state )
239
+ .orElse (null );
253
240
}
254
241
255
242
/**
256
243
* returns SegmentReplicationState of latest completed segment replication events.
257
244
*/
258
245
@ Nullable
259
246
public SegmentReplicationState getlatestCompletedEventSegmentReplicationState (ShardId shardId ) {
260
- return replicator . getCompleted (shardId );
247
+ return completedReplications . get (shardId );
261
248
}
262
249
263
250
/**
@@ -270,11 +257,11 @@ public SegmentReplicationState getSegmentReplicationState(ShardId shardId) {
270
257
}
271
258
272
259
public ReplicationRef <SegmentReplicationTarget > get (long replicationId ) {
273
- return replicator .get (replicationId );
260
+ return onGoingReplications .get (replicationId );
274
261
}
275
262
276
263
public SegmentReplicationTarget get (ShardId shardId ) {
277
- return replicator . get (shardId );
264
+ return onGoingReplications . getOngoingReplicationTarget (shardId );
278
265
}
279
266
280
267
/**
@@ -298,7 +285,7 @@ public synchronized void onNewCheckpoint(final ReplicationCheckpoint receivedChe
298
285
// checkpoint to be replayed once the shard is Active.
299
286
if (replicaShard .state ().equals (IndexShardState .STARTED ) == true ) {
300
287
// Checks if received checkpoint is already present and ahead then it replaces old received checkpoint
301
- SegmentReplicationTarget ongoingReplicationTarget = replicator . get (replicaShard .shardId ());
288
+ SegmentReplicationTarget ongoingReplicationTarget = onGoingReplications . getOngoingReplicationTarget (replicaShard .shardId ());
302
289
if (ongoingReplicationTarget != null ) {
303
290
if (ongoingReplicationTarget .getCheckpoint ().getPrimaryTerm () < receivedCheckpoint .getPrimaryTerm ()) {
304
291
logger .debug (
@@ -517,12 +504,28 @@ public SegmentReplicationTarget startReplication(
517
504
final ReplicationCheckpoint checkpoint ,
518
505
final SegmentReplicationListener listener
519
506
) {
520
- return replicator .startReplication (indexShard , checkpoint , sourceFactory .get (indexShard ), listener );
507
+ final SegmentReplicationTarget target = new SegmentReplicationTarget (
508
+ indexShard ,
509
+ checkpoint ,
510
+ sourceFactory .get (indexShard ),
511
+ listener
512
+ );
513
+ startReplication (target );
514
+ return target ;
521
515
}
522
516
523
517
// pkg-private for integration tests
524
518
void startReplication (final SegmentReplicationTarget target ) {
525
- replicator .startReplication (target , recoverySettings .activityTimeout ());
519
+ final long replicationId ;
520
+ try {
521
+ replicationId = onGoingReplications .startSafe (target , recoverySettings .activityTimeout ());
522
+ } catch (ReplicationFailedException e ) {
523
+ // replication already running for shard.
524
+ target .fail (e , false );
525
+ return ;
526
+ }
527
+ logger .trace (() -> new ParameterizedMessage ("Added new replication to collection {}" , target .description ()));
528
+ threadPool .generic ().execute (new ReplicationRunner (replicationId ));
526
529
}
527
530
528
531
/**
@@ -547,14 +550,89 @@ default void onFailure(ReplicationState state, ReplicationFailedException e, boo
547
550
void onReplicationFailure (SegmentReplicationState state , ReplicationFailedException e , boolean sendShardFailure );
548
551
}
549
552
553
+ /**
554
+ * Runnable implementation to trigger a replication event.
555
+ */
556
+ private class ReplicationRunner extends AbstractRunnable {
557
+
558
+ final long replicationId ;
559
+
560
+ public ReplicationRunner (long replicationId ) {
561
+ this .replicationId = replicationId ;
562
+ }
563
+
564
+ @ Override
565
+ public void onFailure (Exception e ) {
566
+ onGoingReplications .fail (replicationId , new ReplicationFailedException ("Unexpected Error during replication" , e ), false );
567
+ }
568
+
569
+ @ Override
570
+ public void doRun () {
571
+ start (replicationId );
572
+ }
573
+ }
574
+
575
+ private void start (final long replicationId ) {
576
+ final SegmentReplicationTarget target ;
577
+ try (ReplicationRef <SegmentReplicationTarget > replicationRef = onGoingReplications .get (replicationId )) {
578
+ // This check is for handling edge cases where the reference is removed before the ReplicationRunner is started by the
579
+ // threadpool.
580
+ if (replicationRef == null ) {
581
+ return ;
582
+ }
583
+ target = replicationRef .get ();
584
+ }
585
+ target .startReplication (new ActionListener <>() {
586
+ @ Override
587
+ public void onResponse (Void o ) {
588
+ logger .debug (() -> new ParameterizedMessage ("Finished replicating {} marking as done." , target .description ()));
589
+ onGoingReplications .markAsDone (replicationId );
590
+ if (target .state ().getIndex ().recoveredFileCount () != 0 && target .state ().getIndex ().recoveredBytes () != 0 ) {
591
+ completedReplications .put (target .shardId (), target .state ());
592
+ }
593
+ }
594
+
595
+ @ Override
596
+ public void onFailure (Exception e ) {
597
+ logger .debug ("Replication failed {}" , target .description ());
598
+ if (isStoreCorrupt (target ) || e instanceof CorruptIndexException || e instanceof OpenSearchCorruptionException ) {
599
+ onGoingReplications .fail (replicationId , new ReplicationFailedException ("Store corruption during replication" , e ), true );
600
+ return ;
601
+ }
602
+ onGoingReplications .fail (replicationId , new ReplicationFailedException ("Segment Replication failed" , e ), false );
603
+ }
604
+ });
605
+ }
606
+
607
+ private boolean isStoreCorrupt (SegmentReplicationTarget target ) {
608
+ // ensure target is not already closed. In that case
609
+ // we can assume the store is not corrupt and that the replication
610
+ // event completed successfully.
611
+ if (target .refCount () > 0 ) {
612
+ final Store store = target .store ();
613
+ if (store .tryIncRef ()) {
614
+ try {
615
+ return store .isMarkedCorrupted ();
616
+ } catch (IOException ex ) {
617
+ logger .warn ("Unable to determine if store is corrupt" , ex );
618
+ return false ;
619
+ } finally {
620
+ store .decRef ();
621
+ }
622
+ }
623
+ }
624
+ // store already closed.
625
+ return false ;
626
+ }
627
+
550
628
private class FileChunkTransportRequestHandler implements TransportRequestHandler <FileChunkRequest > {
551
629
552
630
// How many bytes we've copied since we last called RateLimiter.pause
553
631
final AtomicLong bytesSinceLastPause = new AtomicLong ();
554
632
555
633
@ Override
556
634
public void messageReceived (final FileChunkRequest request , TransportChannel channel , Task task ) throws Exception {
557
- try (ReplicationRef <SegmentReplicationTarget > ref = replicator . get (request .recoveryId (), request .shardId ())) {
635
+ try (ReplicationRef <SegmentReplicationTarget > ref = onGoingReplications . getSafe (request .recoveryId (), request .shardId ())) {
558
636
final SegmentReplicationTarget target = ref .get ();
559
637
final ActionListener <Void > listener = target .createOrFinishListener (channel , Actions .FILE_CHUNK , request );
560
638
target .handleFileChunk (request , target , bytesSinceLastPause , recoverySettings .replicationRateLimiter (), listener );
0 commit comments