|
| 1 | +# coding: utf-8 |
| 2 | + |
| 3 | +import tracemalloc |
| 4 | +import logging |
| 5 | +import objgraph |
| 6 | +import re |
| 7 | +import gc |
| 8 | +import os |
| 9 | +import time |
| 10 | +from hubblestack.hec.dq import DiskQueue |
| 11 | + |
| 12 | +__virtualname__ = 'memleak' |
| 13 | +log = logging.getLogger(__virtualname__) |
| 14 | +STATE = dict(growth=True, new_item_refmaps=0) |
| 15 | + |
| 16 | +# if the salt.loader replaces this, fine; if not, /var/cache/hubble is fine too |
| 17 | +__opts__ = { 'cachedir': '/var/cache/hubble' } |
| 18 | + |
| 19 | +def __virtual__(): |
| 20 | + return True |
| 21 | + |
| 22 | +def _now(): |
| 23 | + return int(time.time()) |
| 24 | + |
| 25 | +def _apply_tags(tags, *events): |
| 26 | + if tags is not None: |
| 27 | + if not isinstance(tags, (list,tuple)): |
| 28 | + tags = [ x for x in tags.split() if x ] |
| 29 | + for item in events: |
| 30 | + item['tags'] = tags |
| 31 | + |
| 32 | +def growth(collect=True, shortnames=False, tags=None): |
| 33 | + log.debug('growth(%s, %s)', collect, shortnames) |
| 34 | + if collect: |
| 35 | + log.debug(' forcing garbage collection') |
| 36 | + gc.collect() |
| 37 | + |
| 38 | + pid = os.getpid() |
| 39 | + growth = objgraph.growth(shortnames=shortnames) |
| 40 | + |
| 41 | + if STATE['growth']: |
| 42 | + log.debug(' first query') |
| 43 | + STATE['growth'] = False |
| 44 | + |
| 45 | + elif growth: |
| 46 | + log.debug(' found growth') |
| 47 | + growth = [ {'time': _now(), 'pid': pid, 'type': t, 'count': c, 'delta': d} for t, c, d in growth ] |
| 48 | + _apply_tags(tags, *growth) |
| 49 | + return { 'sourcetype': 'memleak.growth', 'events': growth } |
| 50 | + |
| 51 | + else: |
| 52 | + log.debug(' no new growth found') |
| 53 | + |
| 54 | +def _obj_name(x): |
| 55 | + try: |
| 56 | + return '{0.__module__}.{0.__name__}'.format(x) |
| 57 | + except AttributeError: |
| 58 | + pass |
| 59 | + try: |
| 60 | + return '{0.__name__}'.format(x) |
| 61 | + except AttributeError: |
| 62 | + pass |
| 63 | + return '{t}#{i:02x}'.format(t=type(x).__name__, i=id(x)) |
| 64 | + |
| 65 | +def new_item_refmaps(collect=True, types=None, skip=2, max_per_type=10, max_total=50, tags=None): |
| 66 | + log.debug('new_item_refmaps(%s)', collect) |
| 67 | + if collect: |
| 68 | + log.debug(' forcing garbage collection') |
| 69 | + gc.collect() |
| 70 | + |
| 71 | + pid = os.getpid() |
| 72 | + log.debug(' finding new_ids') |
| 73 | + new_ids = objgraph.get_new_ids() # dict of sets |
| 74 | + |
| 75 | + if STATE['new_item_refmaps'] < skip: |
| 76 | + log.debug(' query #%d < %d; not analyzing yet', STATE['new_item_refmaps'], skip) |
| 77 | + STATE['new_item_refmaps'] += 1 |
| 78 | + |
| 79 | + else: |
| 80 | + done = False |
| 81 | + events = list() |
| 82 | + if types is None: |
| 83 | + types = new_ids.keys() |
| 84 | + total_count = 0 |
| 85 | + for type in types: |
| 86 | + type_count = 0 |
| 87 | + log.debug(' finding backref chains for items of type=%s', type) |
| 88 | + for item_id in new_ids.get(type, []): |
| 89 | + item = objgraph.at(item_id) |
| 90 | + name = _obj_name(item) |
| 91 | + log.debug(' finding backrefs chain for item=%s', name) |
| 92 | + refmap = objgraph.find_backref_chain(item, objgraph.is_proper_module) |
| 93 | + refmap = [ _obj_name(x) for x in refmap ] |
| 94 | + events.append({'time': _now(), 'pid': pid, 'type': type, 'name': name, 'chain': refmap}) |
| 95 | + total_count += 1 |
| 96 | + type_count += 1 |
| 97 | + if type_count > max_per_type: |
| 98 | + log.debug('reached max_per_type=%d limit', max_per_type) |
| 99 | + done = True |
| 100 | + if total_count > max_total: |
| 101 | + log.debug('reached max_total=%d limit', max_total) |
| 102 | + done = True |
| 103 | + if done: |
| 104 | + break |
| 105 | + if done: |
| 106 | + break |
| 107 | + if events: |
| 108 | + _apply_tags(tags, *events) |
| 109 | + return { 'sourcetype': 'memleak.new_item_refmaps', 'events': events } |
| 110 | + |
| 111 | +def _queue(): |
| 112 | + qloc = os.path.join(__opts__.get('cachedir', '/var/cache/hubble'), 'memleak-storage') |
| 113 | + return DiskQueue(qloc, size=1024*1024*10) |
| 114 | + |
| 115 | +def reveal_withobj_nir(): |
| 116 | + queue = _queue() |
| 117 | + events = list() |
| 118 | + while True: |
| 119 | + item = queue.get() |
| 120 | + if not item: |
| 121 | + break |
| 122 | + events.append(item) |
| 123 | + return { 'sourcetype': 'memleak.new_item_refmaps', 'events': events } |
| 124 | + |
| 125 | +def with_nir(name, max_per_type=10, max_total=50, types=None): |
| 126 | + class WithWrap(object): |
| 127 | + def __enter__(self): |
| 128 | + gc.collect() |
| 129 | + objgraph.get_new_ids() |
| 130 | + |
| 131 | + def __exit__(self): |
| 132 | + res = new_item_refmaps(max_per_type=max_per_type, |
| 133 | + max_total=max_total, tags='nir:{0}'.format(name)) |
| 134 | + _queue().put(res) |
| 135 | + return WithWrap() |
| 136 | + |
| 137 | +def _build_snapdiff_filters(inc,exc): |
| 138 | + log.debug('building snapdiff filters: inc=%s, exc=%s', inc, exc) |
| 139 | + if not inc and not exc: |
| 140 | + return ( tracemalloc.Filter(True, "*/salt/*"), |
| 141 | + tracemalloc.Filter(True, "*/hubblestack/*"), |
| 142 | + tracemalloc.Filter(False, "*/hubblestack/*/memleak*") ) |
| 143 | + # NOTE: originally, this section below was appended with the filters above |
| 144 | + # That seems not to "work" in the sense that you get an awful lot more |
| 145 | + # returns than you really want by the patterns given in inc/exc. |
| 146 | + # Instead, we leave it to the configure-er to fully describe what they want. |
| 147 | + ret = list() |
| 148 | + other = ( (True, inc), (False, exc) ) |
| 149 | + for tf,lori in other: |
| 150 | + if lori: |
| 151 | + if isinstance(lori, (list,tuple)): |
| 152 | + ret.extend([tracemalloc.Filter(tf, x) for x in lori]) |
| 153 | + else: |
| 154 | + ret.append(tracemalloc.Filter(tf, lori)) |
| 155 | + return ret |
| 156 | + |
| 157 | +def _do_snapdiff_tracemalloc(filter): |
| 158 | + snap = tracemalloc.take_snapshot() |
| 159 | + return dict(snap=snap.filter_traces(filter), t=_now()) |
| 160 | + |
| 161 | +# eg: /usr/local/python/hubble.python/lib/python2.7/site-packages/salt_ssh-2019.2.0-py2.7.egg/salt/log/handlers/__init__.py |
| 162 | +_sdp_r = re.compile(r'(?:(?:site|dist)-packages|hubble-libs)/(.+)/(.+\.py)$') |
| 163 | +def snapdiff_tracemalloc(max_traces=None, sort_by=lambda x: -x['size'], group_by='filename', |
| 164 | + compare_to='first', include_fglob=None, exclude_fglob=None): |
| 165 | + ''' track memory allocations (requires tracemalloc, which requires python |
| 166 | + patches in python2.7, but not in python3). |
| 167 | +
|
| 168 | + params: |
| 169 | + max_traces :- if specified, limites the number of events to this number |
| 170 | + sort_by :- key-function for sorting among: 'size', 'size_diff', 'count', 'count_diff' |
| 171 | + after filtering and reducing the returns, sort them this way |
| 172 | + default: lambda x: -x['size'] |
| 173 | + set to None to disable |
| 174 | + (probably only matters when max_traces are set) |
| 175 | + group_by :- must be one of 'filename' or 'lineno' |
| 176 | + when comparing and generating stats, group size/counts by this key |
| 177 | + compare_to :- must be one of 'first' or 'last' |
| 178 | + compare to the start of time ('first') or the last invocation ('last') |
| 179 | + 'last' compares are something like d(mallocs)/dx |
| 180 | + optional filter params: |
| 181 | + If either of these is specified, no other default filters will be applied. |
| 182 | + include_fglob :- default: '*' |
| 183 | + exclude_fglob :- default: 'hubblestack/*/memleak*' |
| 184 | + ''' |
| 185 | + if not hasattr(tracemalloc, '_d'): |
| 186 | + tracemalloc._d = dict() |
| 187 | + _d = tracemalloc._d |
| 188 | + filter = _build_snapdiff_filters(include_fglob, exclude_fglob) |
| 189 | + if _d.get('first') is None: |
| 190 | + tracemalloc.start(1) # default is 1 frame in the stack trace |
| 191 | + _d['first'] = _d['last'] = _do_snapdiff_tracemalloc(filter) |
| 192 | + return |
| 193 | + cur = _do_snapdiff_tracemalloc(filter) |
| 194 | + stats = cur['snap'].compare_to(_d[compare_to]['snap'], group_by, cumulative=True) |
| 195 | + _d['last'] = cur |
| 196 | + def fmt_s(s): |
| 197 | + ret = {'time': cur['t'], 'fst': _d['first']['t'], 'count': s.count, 'count_diff': s.count_diff, |
| 198 | + 'size': s.size, 'size_diff': s.size_diff, 'fname': '???.??', 'lineno': '?'} |
| 199 | + for tb in s.traceback: |
| 200 | + # NOTE: there's normally only one tb in the traceback, see |
| 201 | + # tracemalloc.start(nframes=1); this is more of a way to avoid |
| 202 | + # exceptions and shorten the filename |
| 203 | + m = _sdp_r.search(tb.filename) |
| 204 | + if m: |
| 205 | + ret['fname'] = m.group(1) + '/' + m.group(2) |
| 206 | + ret['lineno'] = tb.lineno |
| 207 | + return ret |
| 208 | + l0 = len(stats) |
| 209 | + log.info('fst=%d found %d stats', _d['first']['t'], l0) |
| 210 | + stats = [ fmt_s(s) for s in stats ] |
| 211 | + stats = [ s for s in stats if s ] |
| 212 | + if callable(sort_by): |
| 213 | + stats = sorted(stats, key=sort_by) |
| 214 | + l1 = len(stats) |
| 215 | + if l1 != l0: |
| 216 | + log.info('fst=%s reduced to %d stats by filters', _d['first']['t'], l1) |
| 217 | + if stats: |
| 218 | + if max_traces and max_traces > 0: |
| 219 | + stats = stats[:max_traces] |
| 220 | + l2 = len(stats) |
| 221 | + if l2 not in (l0, l1): |
| 222 | + log.info('fst=%s reduced to %d stats by max_traces=%d', _d['first']['t'], l2, max_traces) |
| 223 | + return { 'sourcetype': 'memleak.snapdiff_tracemalloc', 'events': stats } |
0 commit comments