|
20 | 20 |
|
21 | 21 | import static org.mockito.Mockito.any;
|
22 | 22 | import static org.mockito.Mockito.doAnswer;
|
| 23 | +import static org.mockito.Mockito.mock; |
23 | 24 | import static org.mockito.Mockito.spy;
|
24 | 25 | import static org.testng.Assert.assertEquals;
|
25 | 26 | import static org.testng.Assert.assertFalse;
|
26 | 27 | import static org.testng.Assert.assertNotEquals;
|
27 | 28 | import static org.testng.Assert.assertTrue;
|
28 | 29 | import static org.testng.Assert.fail;
|
| 30 | +import com.google.common.collect.Sets; |
29 | 31 | import io.netty.util.concurrent.FastThreadLocalThread;
|
30 | 32 | import java.lang.reflect.Field;
|
31 | 33 | import java.lang.reflect.Method;
|
32 | 34 | import java.time.Duration;
|
33 | 35 | import java.util.Arrays;
|
34 | 36 | import java.util.Optional;
|
| 37 | +import java.util.UUID; |
35 | 38 | import java.util.concurrent.CompletableFuture;
|
36 | 39 | import java.util.concurrent.CountDownLatch;
|
37 | 40 | import java.util.concurrent.TimeUnit;
|
|
48 | 51 | import org.apache.bookkeeper.mledger.impl.ManagedCursorImpl;
|
49 | 52 | import org.apache.bookkeeper.mledger.impl.ManagedLedgerImpl;
|
50 | 53 | import org.apache.pulsar.broker.BrokerTestUtil;
|
| 54 | +import org.apache.pulsar.broker.service.persistent.GeoPersistentReplicator; |
51 | 55 | import org.apache.pulsar.broker.service.persistent.PersistentReplicator;
|
52 | 56 | import org.apache.pulsar.broker.service.persistent.PersistentTopic;
|
53 | 57 | import org.apache.pulsar.client.api.Consumer;
|
@@ -492,4 +496,166 @@ public void testPartitionedTopicLevelReplicationRemoteConflictTopicExist() throw
|
492 | 496 | admin1.topics().deletePartitionedTopic(topicName);
|
493 | 497 | admin2.topics().deletePartitionedTopic(topicName);
|
494 | 498 | }
|
| 499 | + |
| 500 | + /** |
| 501 | + * See the description and execution flow: https://github.com/apache/pulsar/pull/21948. |
| 502 | + * Steps: |
| 503 | + * 1.Create topic, does not enable replication now. |
| 504 | + * - The topic will be loaded in the memory. |
| 505 | + * 2.Enable namespace level replication. |
| 506 | + * - Broker creates a replicator, and the internal producer of replicator is starting. |
| 507 | + * - We inject an error to make the internal producer fail to connect,after few seconds, it will retry to start. |
| 508 | + * 3.Unload bundle. |
| 509 | + * - Starting to close the topic. |
| 510 | + * - The replicator will be closed, but it will not close the internal producer, because the producer has not |
| 511 | + * been created successfully. |
| 512 | + * - We inject a sleeping into the progress of closing the "repl.cursor" to make it stuck. So the topic is still |
| 513 | + * in the process of being closed now. |
| 514 | + * 4.Internal producer retry to connect. |
| 515 | + * - At the next retry, it connected successful. Since the state of "repl.cursor" is not "Closed", this producer |
| 516 | + * will not be closed now. |
| 517 | + * 5.Topic closed. |
| 518 | + * - Cancel the stuck of closing the "repl.cursor". |
| 519 | + * - The topic is wholly closed. |
| 520 | + * 6.Verify: the delayed created internal producer will be closed. In other words, there is no producer is connected |
| 521 | + * to the remote cluster. |
| 522 | + */ |
| 523 | + @Test |
| 524 | + public void testConcurrencyOfUnloadBundleAndRecreateProducer2() throws Exception { |
| 525 | + final String namespaceName = defaultTenant + "/" + UUID.randomUUID().toString().replaceAll("-", ""); |
| 526 | + final String topicName = BrokerTestUtil.newUniqueName("persistent://" + namespaceName + "/tp_"); |
| 527 | + // 1.Create topic, does not enable replication now. |
| 528 | + admin1.namespaces().createNamespace(namespaceName); |
| 529 | + admin2.namespaces().createNamespace(namespaceName); |
| 530 | + admin1.topics().createNonPartitionedTopic(topicName); |
| 531 | + PersistentTopic persistentTopic = |
| 532 | + (PersistentTopic) pulsar1.getBrokerService().getTopic(topicName, false).join().get(); |
| 533 | + |
| 534 | + // We inject an error to make the internal producer fail to connect. |
| 535 | + // The delay time of next retry to create producer is below: |
| 536 | + // 0.1s, 0.2, 0.4, 0.8, 1.6s, 3.2s, 6.4s... |
| 537 | + // If the retry counter is larger than 6, the next creation will be slow enough to close Replicator. |
| 538 | + final AtomicInteger createProducerCounter = new AtomicInteger(); |
| 539 | + final int failTimes = 6; |
| 540 | + injectMockReplicatorProducerBuilder((producerCnf, originalProducer) -> { |
| 541 | + if (topicName.equals(producerCnf.getTopicName())) { |
| 542 | + // There is a switch to determine create producer successfully or not. |
| 543 | + if (createProducerCounter.incrementAndGet() > failTimes) { |
| 544 | + return originalProducer; |
| 545 | + } |
| 546 | + log.info("Retry create replicator.producer count: {}", createProducerCounter); |
| 547 | + // Release producer and fail callback. |
| 548 | + originalProducer.closeAsync(); |
| 549 | + throw new RuntimeException("mock error"); |
| 550 | + } |
| 551 | + return originalProducer; |
| 552 | + }); |
| 553 | + |
| 554 | + // 2.Enable namespace level replication. |
| 555 | + admin1.namespaces().setNamespaceReplicationClusters(namespaceName, Sets.newHashSet(cluster1, cluster2)); |
| 556 | + AtomicReference<PersistentReplicator> replicator = new AtomicReference<PersistentReplicator>(); |
| 557 | + Awaitility.await().untilAsserted(() -> { |
| 558 | + assertFalse(persistentTopic.getReplicators().isEmpty()); |
| 559 | + replicator.set( |
| 560 | + (PersistentReplicator) persistentTopic.getReplicators().values().iterator().next()); |
| 561 | + // Since we inject a producer creation error, the replicator can not start successfully. |
| 562 | + assertFalse(replicator.get().isConnected()); |
| 563 | + }); |
| 564 | + |
| 565 | + // We inject a sleeping into the progress of closing the "repl.cursor" to make it stuck, until the internal |
| 566 | + // producer of the replicator started. |
| 567 | + SpyCursor spyCursor = |
| 568 | + spyCursor(persistentTopic, "pulsar.repl." + pulsar2.getConfig().getClusterName()); |
| 569 | + CursorCloseSignal cursorCloseSignal = makeCursorClosingDelay(spyCursor); |
| 570 | + |
| 571 | + // 3.Unload bundle: call "topic.close(false)". |
| 572 | + // Stuck start new producer, until the state of replicator change to Stopped. |
| 573 | + // The next once of "createProducerSuccessAfterFailTimes" to create producer will be successfully. |
| 574 | + Awaitility.await().pollInterval(Duration.ofMillis(100)).atMost(Duration.ofSeconds(60)).untilAsserted(() -> { |
| 575 | + assertTrue(createProducerCounter.get() >= failTimes); |
| 576 | + }); |
| 577 | + CompletableFuture<Void> topicCloseFuture = persistentTopic.close(true); |
| 578 | + Awaitility.await().atMost(Duration.ofSeconds(30)).untilAsserted(() -> { |
| 579 | + String state = String.valueOf(replicator.get().getState()); |
| 580 | + log.error("replicator state: {}", state); |
| 581 | + assertTrue(state.equals("Disconnected") || state.equals("Terminated")); |
| 582 | + }); |
| 583 | + |
| 584 | + // 5.Delay close cursor, until "replicator.producer" create successfully. |
| 585 | + // The next once retry time of create "replicator.producer" will be 3.2s. |
| 586 | + Thread.sleep(4 * 1000); |
| 587 | + log.info("Replicator.state: {}", replicator.get().getState()); |
| 588 | + cursorCloseSignal.startClose(); |
| 589 | + cursorCloseSignal.startCallback(); |
| 590 | + // Wait for topic close successfully. |
| 591 | + topicCloseFuture.join(); |
| 592 | + |
| 593 | + // 6. Verify there is no orphan producer on the remote cluster. |
| 594 | + Awaitility.await().pollInterval(Duration.ofSeconds(1)).untilAsserted(() -> { |
| 595 | + PersistentTopic persistentTopic2 = |
| 596 | + (PersistentTopic) pulsar2.getBrokerService().getTopic(topicName, false).join().get(); |
| 597 | + assertEquals(persistentTopic2.getProducers().size(), 0); |
| 598 | + Assert.assertFalse(replicator.get().isConnected()); |
| 599 | + }); |
| 600 | + |
| 601 | + // cleanup. |
| 602 | + cleanupTopics(namespaceName, () -> { |
| 603 | + admin1.topics().delete(topicName); |
| 604 | + admin2.topics().delete(topicName); |
| 605 | + }); |
| 606 | + admin1.namespaces().setNamespaceReplicationClusters(namespaceName, Sets.newHashSet(cluster1)); |
| 607 | + admin1.namespaces().deleteNamespace(namespaceName); |
| 608 | + admin2.namespaces().deleteNamespace(namespaceName); |
| 609 | + } |
| 610 | + |
| 611 | + @Test |
| 612 | + public void testUnFenceTopicToReuse() throws Exception { |
| 613 | + final String topicName = BrokerTestUtil.newUniqueName("persistent://" + replicatedNamespace + "/tp"); |
| 614 | + // Wait for replicator started. |
| 615 | + Producer<String> producer1 = client1.newProducer(Schema.STRING).topic(topicName).create(); |
| 616 | + waitReplicatorStarted(topicName); |
| 617 | + |
| 618 | + // Inject an error to make topic close fails. |
| 619 | + final String mockProducerName = UUID.randomUUID().toString(); |
| 620 | + final org.apache.pulsar.broker.service.Producer mockProducer = |
| 621 | + mock(org.apache.pulsar.broker.service.Producer.class); |
| 622 | + doAnswer(invocation -> CompletableFuture.failedFuture(new RuntimeException("mocked error"))) |
| 623 | + .when(mockProducer).disconnect(any()); |
| 624 | + doAnswer(invocation -> CompletableFuture.failedFuture(new RuntimeException("mocked error"))) |
| 625 | + .when(mockProducer).disconnect(); |
| 626 | + PersistentTopic persistentTopic = |
| 627 | + (PersistentTopic) pulsar1.getBrokerService().getTopic(topicName, false).join().get(); |
| 628 | + persistentTopic.getProducers().put(mockProducerName, mockProducer); |
| 629 | + |
| 630 | + // Do close. |
| 631 | + GeoPersistentReplicator replicator1 = |
| 632 | + (GeoPersistentReplicator) persistentTopic.getReplicators().values().iterator().next(); |
| 633 | + try { |
| 634 | + persistentTopic.close(true, false).join(); |
| 635 | + fail("Expected close fails due to a producer close fails"); |
| 636 | + } catch (Exception ex) { |
| 637 | + log.info("Expected error: {}", ex.getMessage()); |
| 638 | + } |
| 639 | + |
| 640 | + // Broker will call `topic.unfenceTopicToResume` if close clients fails. |
| 641 | + // Verify: the replicator will be re-created. |
| 642 | + Awaitility.await().untilAsserted(() -> { |
| 643 | + assertTrue(producer1.isConnected()); |
| 644 | + GeoPersistentReplicator replicator2 = |
| 645 | + (GeoPersistentReplicator) persistentTopic.getReplicators().values().iterator().next(); |
| 646 | + assertNotEquals(replicator1, replicator2); |
| 647 | + assertFalse(replicator1.isConnected()); |
| 648 | + assertFalse(replicator1.producer != null && replicator1.producer.isConnected()); |
| 649 | + assertTrue(replicator2.isConnected()); |
| 650 | + assertTrue(replicator2.producer != null && replicator2.producer.isConnected()); |
| 651 | + }); |
| 652 | + |
| 653 | + // cleanup. |
| 654 | + persistentTopic.getProducers().remove(mockProducerName, mockProducer); |
| 655 | + producer1.close(); |
| 656 | + cleanupTopics(() -> { |
| 657 | + admin1.topics().delete(topicName); |
| 658 | + admin2.topics().delete(topicName); |
| 659 | + }); |
| 660 | + } |
495 | 661 | }
|
0 commit comments