Skip to content

Commit 35a6030

Browse files
Comments update
1 parent 9f7b2a8 commit 35a6030

File tree

3 files changed

+72
-53
lines changed

3 files changed

+72
-53
lines changed

common/events.cpp

+22-35
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,8 @@
11
#include "events_pi.h"
22

3-
/*
4-
* The uuid_unparse() function converts the supplied UUID uu from
5-
* the binary representation into a 36-byte string (plus trailing
6-
* '\0') of the form 1b4e28ba-2fa1-11d2-883f-0016d3cca427 and stores
7-
* this value in the character string pointed to by out.
8-
*/
9-
#define UUID_STR_SIZE 40
10-
11-
/*
12-
* Publisher use echo service and subscriber uses cache service
13-
* The eventd process runs this service, which could be down
14-
* All service interactions being async, a timeout is required
15-
* not to block forever on read.
16-
*
17-
* The unit is in milliseconds in sync with ZMQ_RCVTIMEO of
18-
* zmq_setsockopt.
19-
*
20-
* Publisher uses more to shadow async connectivity from PUB to
21-
* XSUB end point of eventd's proxy. Hene have a less value.
22-
*
23-
* Subscriber uses it for cache management and here we need a
24-
* longer timeout, to handle slow proxy. This timeout value's only
25-
* impact could be subscriber process trying to terminate.
26-
*/
27-
28-
#define EVENTS_SERVICE_TIMEOUT_MS_PUB 200
29-
#define EVENTS_SERVICE_TIMEOUT_MS_SUB 2000
30-
313
/*
324
* Track created publishers to avoid duplicates
33-
* As we track missed event count by publishing instances, avoiding
5+
* As receivers track missed event count by publishing instances, avoiding
346
* duplicates helps reduce load.
357
*/
368

@@ -50,27 +22,33 @@ int EventPublisher::init(const string event_source)
5022
int rc = zmq_connect (sock, get_config(XSUB_END_KEY).c_str());
5123
RET_ON_ERR(rc == 0, "Publisher fails to connect %s", get_config(XSUB_END_KEY).c_str());
5224

53-
// REQ socket is connected and a message is sent & received, more to
54-
// ensure PUB socket had enough time to establish connection.
55-
// Any message published before connection establishment is dropped.
56-
//
5725
/*
5826
* Event service could be down. So have a timeout.
5927
*
6028
*/
6129
rc = m_event_service.init_client(m_zmq_ctx, EVENTS_SERVICE_TIMEOUT_MS_PUB);
6230
RET_ON_ERR (rc == 0, "Failed to init event service");
6331

32+
/*
33+
* REQ socket is connected and a message is sent & received, more to
34+
* ensure PUB socket had enough time to establish connection.
35+
* Any message published before connection establishment is dropped.
36+
* NOTE: We don't wait for response here, but read it upon first publish
37+
* If the publisher init happened early at the start by caller, by the
38+
* the first event is published, the echo response will be available locally.
39+
*/
6440
rc = m_event_service.echo_send("hello");
6541
RET_ON_ERR (rc == 0, "Failed to echo send in event service");
6642

6743
m_event_source = event_source;
6844

45+
{
6946
uuid_t id;
7047
char uuid_str[UUID_STR_SIZE];
7148
uuid_generate(id);
7249
uuid_unparse(id, uuid_str);
7350
m_runtime_id = string(uuid_str);
51+
}
7452

7553
m_socket = sock;
7654
out:
@@ -104,9 +82,12 @@ EventPublisher::publish(const string tag, const event_params_t *params)
10482
* as provided in publisher init.
10583
*/
10684
m_event_service.echo_receive(s);
85+
86+
/* Close it as we don't need it anymore */
10787
m_event_service.close_service();
10888
}
10989

90+
/* Check for timestamp in params. If not, provide it. */
11091
string param_str;
11192
event_params_t evt_params;
11293
if (params != NULL) {
@@ -120,6 +101,7 @@ EventPublisher::publish(const string tag, const event_params_t *params)
120101
evt_params[event_ts_param] = get_timestamp();
121102
params = &evt_params;
122103
}
104+
123105
rc = serialize(*params, param_str);
124106
RET_ON_ERR(rc == 0, "failed to serialize params %s",
125107
map_to_str(*params).c_str());
@@ -232,9 +214,12 @@ EventSubscriber::~EventSubscriber()
232214
rc = zmq_message_read(m_socket, ZMQ_DONTWAIT, source, evt_data);
233215
if (rc == -1) {
234216
if (zerrno == EAGAIN) {
235-
rc = 0;
217+
/* Try again after a small pause */
218+
this_thread::sleep_for(chrono::milliseconds(10));
219+
}
220+
else {
221+
break;
236222
}
237-
break;
238223
}
239224
serialize(evt_data, evt_str);
240225
events.push_back(evt_str);
@@ -315,10 +300,12 @@ EventSubscriber::prune_track()
315300
{
316301
map<time_t, vector<runtime_id_t> > lst;
317302

303+
/* Sort entries by last touched time */
318304
for(const auto e: m_track) {
319305
lst[e.second.epoch_secs].push_back(e.first);
320306
}
321307

308+
/* By default it walks from lowest value / earliest timestamp */
322309
map<time_t, vector<runtime_id_t> >::const_iterator itc = lst.begin();
323310
for(; (itc != lst.end()) && (m_track.size() > MAX_PUBLISHERS_COUNT); ++itc) {
324311
for (const auto r: itc->second) {

common/events_pi.h

+29-2
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,7 @@ class EventSubscriber : public events_base
111111
event_service m_event_service;
112112

113113
/*
114-
* Set to true, upon cache read returning non zero count of events
115-
* implying more to read.
114+
* Set to true, if there may be more events to read from cache.
116115
*/
117116
bool m_cache_read;
118117

@@ -157,4 +156,32 @@ class EventSubscriber : public events_base
157156
*/
158157
#define CACHE_DRAIN_IN_MILLISECS 1000
159158

159+
/*
160+
* The uuid_unparse() function converts the supplied UUID uu from
161+
* the binary representation into a 36-byte string (plus trailing
162+
* '\0') of the form 1b4e28ba-2fa1-11d2-883f-0016d3cca427 and stores
163+
* this value in the character string pointed to by out.
164+
*/
165+
#define UUID_STR_SIZE 40
166+
167+
/*
168+
* Publisher use echo service and subscriber uses cache service
169+
* The eventd process runs this service, which could be down
170+
* All service interactions being async, a timeout is required
171+
* not to block forever on read.
172+
*
173+
* The unit is in milliseconds in sync with ZMQ_RCVTIMEO of
174+
* zmq_setsockopt.
175+
*
176+
* Publisher uses more to shadow async connectivity from PUB to
177+
* XSUB end point of eventd's proxy. Hence have a smaller value.
178+
*
179+
* Subscriber uses it for cache management and here we need a
180+
* longer timeout, to handle slow proxy. This timeout value's only
181+
* impact could be when subscriber process is trying to terminate.
182+
*/
183+
184+
#define EVENTS_SERVICE_TIMEOUT_MS_PUB 200
185+
#define EVENTS_SERVICE_TIMEOUT_MS_SUB 2000
186+
160187
#endif /* !_EVENTS_PI_H */

common/events_service.cpp

+21-16
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,30 @@
11
#include "events_service.h"
22

33
/*
4-
* For brainstorming, if helpful
5-
* The cache messages are read in either direction
6-
* Upon start, the caller gives a set of events read for about 2 seconds
7-
* in non-blocking mode to give it as start stock.
4+
* Cache management
85
*
9-
* Upon cache stop, events collected MAX over a time is read by the caller.
10-
*
11-
* These messages are currently provided as vector list of strings.
12-
* As cache start provided a small subset, it is given as part of start request
13-
* Since cache read can be too many, multiple cache_Read requests are made
14-
* until no more and each returns a subset as vector of strings.
6+
* 1)` Caller expected to call init first, which initiates the connection
7+
* to the capture end point. Being async, it would take some milliseconds
8+
* to connect.
159
*
16-
* Another way, the entire cache in either direction can be sent/received
17-
* via PAIR socket. But this woulkd need a special terminating message to
18-
* indicate read end, as non-blocking read returning no event does not
19-
* necessarily mean end
10+
* 2) Caller starts the cache, optionally with some local cache it may have.
11+
* The cache service keeps it as its startup/initial stock.
12+
* This helps the caller saves his local cache with cache service.
2013
*
21-
* Not sure, what the gain here is vs current approach of vector<string>
22-
* Just a note for now, not to lose a line of possibility.
14+
* 3) Caller call stops, upon it making connect to XPUB end.
15+
* As caller's connect is async and also this zmq end may have some cache
16+
* of events by ZMQ locally. So read events little longer.
17+
*
18+
* 4) Upon stop, the caller may read cached events.
19+
* The events are provided in FIFO order.
20+
* As cached events can be too many, the service returns a few at a time.
21+
* The caller is expected to read repeatedly until no event is returned.
22+
*
23+
* Cache overflow:
24+
* A ceil is set and may run out of memory, before ceil is reached.
25+
* In either case, the caching is *not* completely stopped but cached as
26+
* one event per runtime-id/publishing instance. This info is required
27+
* to compute missed message count due to overflow and otherwise.
2328
*/
2429

2530
int

0 commit comments

Comments
 (0)