Skip to content

Commit 7a6247c

Browse files
feat(lightpush): introduce ReliabilityMonitor and allow send retries (#2130)
* chore: restructure reliabiltiy monitors * feat: setup sender monitor * chore: update tests * chore: minor fixes * chore: comment for doc
1 parent 7ad1d32 commit 7a6247c

File tree

9 files changed

+172
-71
lines changed

9 files changed

+172
-71
lines changed

packages/sdk/src/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ export {
1414
defaultLibp2p,
1515
createLibp2pAndUpdateOptions
1616
} from "./create/index.js";
17-
export { wakuLightPush } from "./protocols/light_push.js";
17+
export { wakuLightPush } from "./protocols/lightpush/index.js";
1818
export { wakuFilter } from "./protocols/filter/index.js";
19-
export { wakuStore } from "./protocols/store.js";
19+
export { wakuStore } from "./protocols/store/index.js";
2020

2121
export * as waku from "@waku/core";
2222
export * as utils from "@waku/utils";

packages/sdk/src/protocols/filter/subscription_manager.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,10 @@ import {
2020
import { WakuMessage } from "@waku/proto";
2121
import { groupByContentTopic, Logger } from "@waku/utils";
2222

23+
import { ReliabilityMonitorManager } from "../../reliability_monitor/index.js";
24+
import { ReceiverReliabilityMonitor } from "../../reliability_monitor/receiver.js";
25+
2326
import { DEFAULT_KEEP_ALIVE, DEFAULT_SUBSCRIBE_OPTIONS } from "./constants.js";
24-
import {
25-
ReceiverReliabilityMonitor,
26-
ReliabilityMonitorManager
27-
} from "./reliability_monitor.js";
2827

2928
const log = new Logger("sdk:filter:subscription_manager");
3029

packages/sdk/src/protocols/light_push.ts renamed to packages/sdk/src/protocols/lightpush/index.ts

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,17 @@ import {
1313
} from "@waku/interfaces";
1414
import { ensurePubsubTopicIsConfigured, Logger } from "@waku/utils";
1515

16-
import { BaseProtocolSDK } from "./base_protocol.js";
16+
import { ReliabilityMonitorManager } from "../../reliability_monitor/index.js";
17+
import { SenderReliabilityMonitor } from "../../reliability_monitor/sender.js";
18+
import { BaseProtocolSDK } from "../base_protocol.js";
1719

1820
const log = new Logger("sdk:light-push");
1921

2022
class LightPushSDK extends BaseProtocolSDK implements ILightPushSDK {
2123
public readonly protocol: LightPushCore;
2224

25+
private readonly reliabilityMonitor: SenderReliabilityMonitor;
26+
2327
public constructor(
2428
connectionManager: ConnectionManager,
2529
libp2p: Libp2p,
@@ -33,6 +37,10 @@ class LightPushSDK extends BaseProtocolSDK implements ILightPushSDK {
3337
}
3438
);
3539

40+
this.reliabilityMonitor = ReliabilityMonitorManager.createSenderMonitor(
41+
this.renewPeer.bind(this)
42+
);
43+
3644
this.protocol = this.core as LightPushCore;
3745
}
3846

@@ -89,16 +97,23 @@ class LightPushSDK extends BaseProtocolSDK implements ILightPushSDK {
8997
successes.push(success);
9098
}
9199
if (failure) {
100+
failures.push(failure);
92101
if (failure.peerId) {
93-
try {
94-
await this.renewPeer(failure.peerId);
95-
log.info("Renewed peer", failure.peerId.toString());
96-
} catch (error) {
97-
log.error("Failed to renew peer", error);
102+
const peer = this.connectedPeers.find((connectedPeer) =>
103+
connectedPeer.id.equals(failure.peerId)
104+
);
105+
if (peer) {
106+
log.info(`
107+
Failed to send message to peer ${failure.peerId}.
108+
Retrying the message with the same peer in the background.
109+
If this fails, the peer will be renewed.
110+
`);
111+
void this.reliabilityMonitor.attemptRetriesOrRenew(
112+
failure.peerId,
113+
() => this.protocol.send(encoder, message, peer)
114+
);
98115
}
99116
}
100-
101-
failures.push(failure);
102117
}
103118
} else {
104119
log.error("Failed unexpectedly while sending:", result.reason);

packages/sdk/src/protocols/store.ts renamed to packages/sdk/src/protocols/store/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import {
1010
import { messageHash } from "@waku/message-hash";
1111
import { ensurePubsubTopicIsConfigured, isDefined, Logger } from "@waku/utils";
1212

13-
import { BaseProtocolSDK } from "./base_protocol.js";
13+
import { BaseProtocolSDK } from "../base_protocol.js";
1414

1515
const DEFAULT_NUM_PEERS = 1;
1616

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import type { Peer, PeerId } from "@libp2p/interface";
2+
import {
3+
ContentTopic,
4+
CoreProtocolResult,
5+
PubsubTopic
6+
} from "@waku/interfaces";
7+
8+
import { ReceiverReliabilityMonitor } from "./receiver.js";
9+
import { SenderReliabilityMonitor } from "./sender.js";
10+
11+
export class ReliabilityMonitorManager {
12+
private static receiverMonitors: Map<
13+
PubsubTopic,
14+
ReceiverReliabilityMonitor
15+
> = new Map();
16+
private static senderMonitor: SenderReliabilityMonitor | undefined;
17+
18+
public static createReceiverMonitor(
19+
pubsubTopic: PubsubTopic,
20+
getPeers: () => Peer[],
21+
renewPeer: (peerId: PeerId) => Promise<Peer>,
22+
getContentTopics: () => ContentTopic[],
23+
protocolSubscribe: (
24+
pubsubTopic: PubsubTopic,
25+
peer: Peer,
26+
contentTopics: ContentTopic[]
27+
) => Promise<CoreProtocolResult>
28+
): ReceiverReliabilityMonitor {
29+
if (ReliabilityMonitorManager.receiverMonitors.has(pubsubTopic)) {
30+
return ReliabilityMonitorManager.receiverMonitors.get(pubsubTopic)!;
31+
}
32+
33+
const monitor = new ReceiverReliabilityMonitor(
34+
pubsubTopic,
35+
getPeers,
36+
renewPeer,
37+
getContentTopics,
38+
protocolSubscribe
39+
);
40+
ReliabilityMonitorManager.receiverMonitors.set(pubsubTopic, monitor);
41+
return monitor;
42+
}
43+
44+
public static createSenderMonitor(
45+
renewPeer: (peerId: PeerId) => Promise<Peer>
46+
): SenderReliabilityMonitor {
47+
if (!ReliabilityMonitorManager.senderMonitor) {
48+
ReliabilityMonitorManager.senderMonitor = new SenderReliabilityMonitor(
49+
renewPeer
50+
);
51+
}
52+
return ReliabilityMonitorManager.senderMonitor;
53+
}
54+
55+
private constructor() {}
56+
57+
public static stop(pubsubTopic: PubsubTopic): void {
58+
this.receiverMonitors.delete(pubsubTopic);
59+
this.senderMonitor = undefined;
60+
}
61+
62+
public static stopAll(): void {
63+
for (const [pubsubTopic, monitor] of this.receiverMonitors) {
64+
monitor.setMaxMissedMessagesThreshold(undefined);
65+
monitor.setMaxPingFailures(undefined);
66+
this.receiverMonitors.delete(pubsubTopic);
67+
this.senderMonitor = undefined;
68+
}
69+
}
70+
}

packages/sdk/src/protocols/filter/reliability_monitor.ts renamed to packages/sdk/src/reliability_monitor/receiver.ts

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -21,53 +21,6 @@ const log = new Logger("sdk:receiver:reliability_monitor");
2121

2222
const DEFAULT_MAX_PINGS = 3;
2323

24-
export class ReliabilityMonitorManager {
25-
private static receiverMonitors: Map<
26-
PubsubTopic,
27-
ReceiverReliabilityMonitor
28-
> = new Map();
29-
30-
public static createReceiverMonitor(
31-
pubsubTopic: PubsubTopic,
32-
getPeers: () => Peer[],
33-
renewPeer: (peerId: PeerId) => Promise<Peer>,
34-
getContentTopics: () => ContentTopic[],
35-
protocolSubscribe: (
36-
pubsubTopic: PubsubTopic,
37-
peer: Peer,
38-
contentTopics: ContentTopic[]
39-
) => Promise<CoreProtocolResult>
40-
): ReceiverReliabilityMonitor {
41-
if (ReliabilityMonitorManager.receiverMonitors.has(pubsubTopic)) {
42-
return ReliabilityMonitorManager.receiverMonitors.get(pubsubTopic)!;
43-
}
44-
45-
const monitor = new ReceiverReliabilityMonitor(
46-
pubsubTopic,
47-
getPeers,
48-
renewPeer,
49-
getContentTopics,
50-
protocolSubscribe
51-
);
52-
ReliabilityMonitorManager.receiverMonitors.set(pubsubTopic, monitor);
53-
return monitor;
54-
}
55-
56-
private constructor() {}
57-
58-
public static destroy(pubsubTopic: PubsubTopic): void {
59-
this.receiverMonitors.delete(pubsubTopic);
60-
}
61-
62-
public static destroyAll(): void {
63-
for (const [pubsubTopic, monitor] of this.receiverMonitors) {
64-
monitor.setMaxMissedMessagesThreshold(undefined);
65-
monitor.setMaxPingFailures(undefined);
66-
this.receiverMonitors.delete(pubsubTopic);
67-
}
68-
}
69-
}
70-
7124
export class ReceiverReliabilityMonitor {
7225
private receivedMessagesHashes: ReceivedMessageHashes;
7326
private missedMessagesByPeer: Map<string, number> = new Map();
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import type { Peer, PeerId } from "@libp2p/interface";
2+
import { CoreProtocolResult, PeerIdStr } from "@waku/interfaces";
3+
import { Logger } from "@waku/utils";
4+
5+
const log = new Logger("sdk:sender:reliability_monitor");
6+
7+
const DEFAULT_MAX_ATTEMPTS_BEFORE_RENEWAL = 3;
8+
9+
export class SenderReliabilityMonitor {
10+
private attempts: Map<PeerIdStr, number> = new Map();
11+
private readonly maxAttemptsBeforeRenewal =
12+
DEFAULT_MAX_ATTEMPTS_BEFORE_RENEWAL;
13+
14+
public constructor(private renewPeer: (peerId: PeerId) => Promise<Peer>) {}
15+
16+
public async attemptRetriesOrRenew(
17+
peerId: PeerId,
18+
protocolSend: () => Promise<CoreProtocolResult>
19+
): Promise<void> {
20+
const peerIdStr = peerId.toString();
21+
const currentAttempts = this.attempts.get(peerIdStr) || 0;
22+
this.attempts.set(peerIdStr, currentAttempts + 1);
23+
24+
if (currentAttempts + 1 < this.maxAttemptsBeforeRenewal) {
25+
try {
26+
const result = await protocolSend();
27+
if (result.success) {
28+
log.info(`Successfully sent message after retry to ${peerIdStr}`);
29+
this.attempts.delete(peerIdStr);
30+
} else {
31+
log.error(
32+
`Failed to send message after retry to ${peerIdStr}: ${result.failure}`
33+
);
34+
await this.attemptRetriesOrRenew(peerId, protocolSend);
35+
}
36+
} catch (error) {
37+
log.error(
38+
`Failed to send message after retry to ${peerIdStr}: ${error}`
39+
);
40+
await this.attemptRetriesOrRenew(peerId, protocolSend);
41+
}
42+
} else {
43+
try {
44+
const newPeer = await this.renewPeer(peerId);
45+
log.info(
46+
`Renewed peer ${peerId.toString()} to ${newPeer.id.toString()}`
47+
);
48+
49+
this.attempts.delete(peerIdStr);
50+
this.attempts.set(newPeer.id.toString(), 0);
51+
await protocolSend();
52+
} catch (error) {
53+
log.error(`Failed to renew peer ${peerId.toString()}: ${error}`);
54+
}
55+
}
56+
}
57+
}

packages/sdk/src/waku.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ import { Protocols } from "@waku/interfaces";
1717
import { Logger } from "@waku/utils";
1818

1919
import { wakuFilter } from "./protocols/filter/index.js";
20-
import { ReliabilityMonitorManager } from "./protocols/filter/reliability_monitor.js";
21-
import { wakuLightPush } from "./protocols/light_push.js";
22-
import { wakuStore } from "./protocols/store.js";
20+
import { wakuLightPush } from "./protocols/lightpush/index.js";
21+
import { wakuStore } from "./protocols/store/index.js";
22+
import { ReliabilityMonitorManager } from "./reliability_monitor/index.js";
2323

2424
export const DefaultPingKeepAliveValueSecs = 5 * 60;
2525
export const DefaultRelayKeepAliveValueSecs = 5 * 60;
@@ -196,7 +196,7 @@ export class WakuNode implements Waku {
196196
}
197197

198198
public async stop(): Promise<void> {
199-
ReliabilityMonitorManager.destroyAll();
199+
ReliabilityMonitorManager.stopAll();
200200
this.connectionManager.stop();
201201
await this.libp2p.stop();
202202
}

packages/tests/tests/light-push/peer_management.spec.ts

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { LightNode } from "@waku/interfaces";
22
import { createEncoder, utf8ToBytes } from "@waku/sdk";
3+
import { delay } from "@waku/utils";
34
import { expect } from "chai";
45
import { describe } from "mocha";
56

@@ -78,18 +79,24 @@ describe("Waku Light Push: Peer Management: E2E", function () {
7879
expect(response2.failures).to.have.length(1);
7980
expect(response2.failures?.[0].peerId).to.equal(peerToDisconnect);
8081

81-
// send another lightpush request -- renewal should have triggerred and new peer should be used instead of the disconnected one
82+
// send another lightpush request
83+
// reattempts to send should be triggerred
84+
// then renewal should happen
85+
// so one failure should exist
8286
const response3 = await waku.lightPush.send(encoder, {
8387
payload: utf8ToBytes("Hello_World")
8488
});
8589

90+
// wait for reattempts to finish as they are async and not awaited
91+
await delay(500);
92+
93+
// doing -1 because the peer that was disconnected is not in the successes
8694
expect(response3.successes.length).to.be.equal(
87-
waku.lightPush.numPeersToUse
95+
waku.lightPush.numPeersToUse - 1
8896
);
97+
// and exists in failure instead
98+
expect(response3.failures).to.have.length(1);
8999

90100
expect(response3.successes).to.not.include(peerToDisconnect);
91-
if (response3.failures) {
92-
expect(response3.failures.length).to.equal(0);
93-
}
94101
});
95102
});

0 commit comments

Comments
 (0)