Skip to content

Commit 274c986

Browse files
authored
Merge pull request #17155 from opensourcerouting/memstats-zlog
lib: `debug memstats-at-exit` improvements
2 parents be3b97d + b3e4007 commit 274c986

File tree

11 files changed

+129
-51
lines changed

11 files changed

+129
-51
lines changed

lib/libfrr.c

+16-25
Original file line numberDiff line numberDiff line change
@@ -1239,10 +1239,6 @@ void frr_early_fini(void)
12391239

12401240
void frr_fini(void)
12411241
{
1242-
FILE *fp;
1243-
char filename[128];
1244-
int have_leftovers = 0;
1245-
12461242
hook_call(frr_fini);
12471243

12481244
vty_terminate();
@@ -1263,32 +1259,27 @@ void frr_fini(void)
12631259
event_master_free(master);
12641260
master = NULL;
12651261
zlog_tls_buffer_fini();
1266-
zlog_fini();
1262+
1263+
if (0) {
1264+
/* this is intentionally disabled. zlog remains running until
1265+
* exit(), so even the very last item done during shutdown can
1266+
* have its zlog() messages written out.
1267+
*
1268+
* Yes this causes memory leaks. They are explicitly marked
1269+
* with DEFINE_MGROUP_ACTIVEATEXIT, which is only used for
1270+
* log target memory allocations, and excluded from leak
1271+
* reporting at shutdown. This is strongly preferable over
1272+
* just discarding error messages at shutdown.
1273+
*/
1274+
zlog_fini();
1275+
}
1276+
12671277
/* frrmod_init -> nothing needed / hooks */
12681278
rcu_shutdown();
12691279

12701280
frrmod_terminate();
12711281

1272-
/* also log memstats to stderr when stderr goes to a file*/
1273-
if (debug_memstats_at_exit || !isatty(STDERR_FILENO))
1274-
have_leftovers = log_memstats(stderr, di->name);
1275-
1276-
/* in case we decide at runtime that we want exit-memstats for
1277-
* a daemon
1278-
* (only do this if we actually have something to print though)
1279-
*/
1280-
if (!debug_memstats_at_exit || !have_leftovers)
1281-
return;
1282-
1283-
snprintf(filename, sizeof(filename), "/tmp/frr-memstats-%s-%llu-%llu",
1284-
di->name, (unsigned long long)getpid(),
1285-
(unsigned long long)time(NULL));
1286-
1287-
fp = fopen(filename, "w");
1288-
if (fp) {
1289-
log_memstats(fp, di->name);
1290-
fclose(fp);
1291-
}
1282+
log_memstats(di->name, debug_memstats_at_exit);
12921283
}
12931284

12941285
struct json_object *frr_daemon_state_load(void)

lib/log.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ void memory_oom(size_t size, const char *name)
306306
"out of memory: failed to allocate %zu bytes for %s object",
307307
size, name);
308308
zlog_backtrace(LOG_CRIT);
309-
log_memstats(stderr, "log");
309+
log_memstats(zlog_progname, true);
310310
abort();
311311
}
312312

lib/memory.c

+88-15
Original file line numberDiff line numberDiff line change
@@ -148,35 +148,108 @@ int qmem_walk(qmem_walk_fn *func, void *arg)
148148
}
149149

150150
struct exit_dump_args {
151-
FILE *fp;
152-
const char *prefix;
151+
const char *daemon_name;
152+
bool do_log;
153+
bool do_file;
154+
bool do_stderr;
153155
int error;
156+
FILE *fp;
157+
struct memgroup *last_mg;
154158
};
155159

160+
static void qmem_exit_fopen(struct exit_dump_args *eda)
161+
{
162+
char filename[128];
163+
164+
if (eda->fp || !eda->do_file || !eda->daemon_name)
165+
return;
166+
167+
snprintf(filename, sizeof(filename), "/tmp/frr-memstats-%s-%llu-%llu", eda->daemon_name,
168+
(unsigned long long)getpid(), (unsigned long long)time(NULL));
169+
eda->fp = fopen(filename, "w");
170+
171+
if (!eda->fp) {
172+
zlog_err("failed to open memstats dump file %pSQq: %m", filename);
173+
/* don't try opening file over and over again */
174+
eda->do_file = false;
175+
}
176+
}
177+
156178
static int qmem_exit_walker(void *arg, struct memgroup *mg, struct memtype *mt)
157179
{
158180
struct exit_dump_args *eda = arg;
181+
const char *prefix = eda->daemon_name ?: "NONE";
182+
char size[32];
183+
184+
if (!mt)
185+
/* iterator calls mg=X, mt=NULL first */
186+
return 0;
159187

160-
if (!mt) {
161-
fprintf(eda->fp,
162-
"%s: showing active allocations in memory group %s\n",
163-
eda->prefix, mg->name);
188+
if (!mt->n_alloc)
189+
return 0;
164190

165-
} else if (mt->n_alloc) {
166-
char size[32];
167-
if (!mg->active_at_exit)
168-
eda->error++;
191+
if (mt->size != SIZE_VAR)
169192
snprintf(size, sizeof(size), "%10zu", mt->size);
170-
fprintf(eda->fp, "%s: memstats: %-30s: %6zu * %s\n",
171-
eda->prefix, mt->name, mt->n_alloc,
172-
mt->size == SIZE_VAR ? "(variably sized)" : size);
193+
else
194+
snprintf(size, sizeof(size), "(variably sized)");
195+
196+
if (mg->active_at_exit) {
197+
/* not an error - this memgroup has allocations remain active
198+
* at exit. Only printed to zlog_debug.
199+
*/
200+
if (!eda->do_log)
201+
return 0;
202+
203+
if (eda->last_mg != mg) {
204+
zlog_debug("showing active allocations in memory group %s (not an error)",
205+
mg->name);
206+
eda->last_mg = mg;
207+
}
208+
zlog_debug("memstats: %-30s: %6zu * %s", mt->name, mt->n_alloc, size);
209+
return 0;
173210
}
211+
212+
eda->error++;
213+
if (eda->do_file)
214+
qmem_exit_fopen(eda);
215+
216+
if (eda->last_mg != mg) {
217+
if (eda->do_log)
218+
zlog_warn("showing active allocations in memory group %s", mg->name);
219+
if (eda->do_stderr)
220+
fprintf(stderr, "%s: showing active allocations in memory group %s\n",
221+
prefix, mg->name);
222+
if (eda->fp)
223+
fprintf(eda->fp, "%s: showing active allocations in memory group %s\n",
224+
prefix, mg->name);
225+
eda->last_mg = mg;
226+
}
227+
228+
if (eda->do_log)
229+
zlog_warn("memstats: %-30s: %6zu * %s", mt->name, mt->n_alloc, size);
230+
if (eda->do_stderr)
231+
fprintf(stderr, "%s: memstats: %-30s: %6zu * %s\n", prefix, mt->name, mt->n_alloc,
232+
size);
233+
if (eda->fp)
234+
fprintf(eda->fp, "%s: memstats: %-30s: %6zu * %s\n", prefix, mt->name, mt->n_alloc,
235+
size);
174236
return 0;
175237
}
176238

177-
int log_memstats(FILE *fp, const char *prefix)
239+
int log_memstats(const char *daemon_name, bool enabled)
178240
{
179-
struct exit_dump_args eda = {.fp = fp, .prefix = prefix, .error = 0};
241+
struct exit_dump_args eda = {
242+
.daemon_name = daemon_name,
243+
.do_log = enabled,
244+
.do_file = enabled,
245+
.do_stderr = enabled || !isatty(STDERR_FILENO),
246+
.error = 0,
247+
};
248+
180249
qmem_walk(qmem_exit_walker, &eda);
250+
if (eda.fp)
251+
fclose(eda.fp);
252+
if (eda.error && eda.do_log)
253+
zlog_warn("exiting with %d leaked MTYPEs", eda.error);
181254
return eda.error;
182255
}

lib/memory.h

+6-2
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ struct memgroup {
6767
* but MGROUP_* aren't.
6868
*/
6969

70+
/* clang-format off */
71+
7072
#define DECLARE_MGROUP(name) extern struct memgroup _mg_##name
7173
#define _DEFINE_MGROUP(mname, desc, ...) \
7274
struct memgroup _mg_##mname _DATA_SECTION("mgroups") = { \
@@ -75,6 +77,7 @@ struct memgroup {
7577
.next = NULL, \
7678
.insert = NULL, \
7779
.ref = NULL, \
80+
__VA_ARGS__ \
7881
}; \
7982
static void _mginit_##mname(void) __attribute__((_CONSTRUCTOR(1000))); \
8083
static void _mginit_##mname(void) \
@@ -136,6 +139,8 @@ struct memgroup {
136139
DEFINE_MTYPE_ATTR(group, name, static, desc) \
137140
/* end */
138141

142+
/* clang-format on */
143+
139144
DECLARE_MGROUP(LIB);
140145
DECLARE_MTYPE(TMP);
141146
DECLARE_MTYPE(TMP_TTABLE);
@@ -176,8 +181,7 @@ static inline size_t mtype_stats_alloc(struct memtype *mt)
176181
* last value from qmem_walk_fn. */
177182
typedef int qmem_walk_fn(void *arg, struct memgroup *mg, struct memtype *mt);
178183
extern int qmem_walk(qmem_walk_fn *func, void *arg);
179-
extern int log_memstats(FILE *fp, const char *);
180-
#define log_memstats_stderr(prefix) log_memstats(stderr, prefix)
184+
extern int log_memstats(const char *daemon_name, bool enabled);
181185

182186
extern __attribute__((__noreturn__)) void memory_oom(size_t size,
183187
const char *name);

lib/sigevent.c

+12-2
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,18 @@ core_handler(int signo, siginfo_t *siginfo, void *context)
237237

238238
zlog_signal(signo, "aborting...", siginfo, pc);
239239

240-
/* dump memory stats on core */
241-
log_memstats(stderr, "core_handler");
240+
/* there used to be a log_memstats() call here, to dump MTYPE counters
241+
* on a coredump. This is not possible since log_memstats is not
242+
* AS-Safe, as it calls fopen(), fprintf(), and cousins. This can
243+
* lead to a deadlock depending on where we crashed - very much not a
244+
* good thing if the process just hangs there after a crash.
245+
*
246+
* The alarm(1) above tries to alleviate this, but that's really a
247+
* last resort recovery. Stick with AS-safe calls here.
248+
*
249+
* If the fprintf() calls are removed from log_memstats(), this can be
250+
* added back in, since writing to log with zlog_sigsafe() is AS-safe.
251+
*/
242252

243253
/*
244254
* This is a buffer flush because FRR is going down

tests/isisd/test_fuzz_isis_tlv.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ static bool atexit_registered;
2222

2323
static void show_meminfo_at_exit(void)
2424
{
25-
log_memstats(stderr, "isis fuzztest");
25+
log_memstats(NULL, true);
2626
}
2727

2828
static int comp_line(const void *p1, const void *p2)

tests/isisd/test_isis_spf.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ static void vty_do_exit(int isexit)
475475
yang_terminate();
476476
event_master_free(master);
477477

478-
log_memstats(stderr, "test-isis-spf");
478+
log_memstats(NULL, true);
479479
if (!isexit)
480480
exit(0);
481481
}

tests/lib/cli/common_cli.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ static void vty_do_exit(int isexit)
4343
yang_terminate();
4444
event_master_free(master);
4545

46-
log_memstats(stderr, "testcli");
46+
log_memstats(NULL, true);
4747
if (!isexit)
4848
exit(0);
4949
}

tests/lib/northbound/test_oper_data.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ static void vty_do_exit(int isexit)
427427
yang_terminate();
428428
event_master_free(master);
429429

430-
log_memstats(stderr, "test-nb-oper-data");
430+
log_memstats(NULL, true);
431431
if (!isexit)
432432
exit(0);
433433
}

tests/lib/test_typelist.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,6 @@ int main(int argc, char **argv)
156156
test_ATOMSORT_UNIQ();
157157
test_ATOMSORT_NONUNIQ();
158158

159-
log_memstats_stderr("test: ");
159+
log_memstats(NULL, true);
160160
return 0;
161161
}

tests/lib/test_zmq.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ static void run_server(int syncfd)
285285
zmq_close(zmqsock);
286286
frrzmq_finish();
287287
event_master_free(master);
288-
log_memstats_stderr("test");
288+
log_memstats(NULL, true);
289289
}
290290

291291
int main(void)

0 commit comments

Comments
 (0)