|
| 1 | +#!/usr/bin/python2 |
| 2 | +# |
| 3 | +# Crunch the CSV output from: |
| 4 | +# |
| 5 | +# SELECT eventtime, |
| 6 | +# eventname, |
| 7 | +# useridentity.username, |
| 8 | +# useragent, |
| 9 | +# errorcode, |
| 10 | +# errormessage |
| 11 | +# FROM "default"."cloudtrail_logs_cloud_trail_test_clayton" |
| 12 | +# WHERE from_iso8601_timestamp(eventtime) > date_add('hour', -48, now()) |
| 13 | +# AND eventname IN ('CreateVpc', 'DeleteVpc') |
| 14 | +# ORDER BY eventtime; |
| 15 | +# |
| 16 | +# with: |
| 17 | +# |
| 18 | +# $ ./vpc.py <vpc.csv |
| 19 | + |
| 20 | +import collections |
| 21 | +import csv |
| 22 | +import datetime |
| 23 | +import sys |
| 24 | + |
| 25 | +import matplotlib.dates |
| 26 | +import matplotlib.pyplot |
| 27 | + |
| 28 | + |
| 29 | +error_suffix = ' (error)' |
| 30 | +reader = csv.DictReader(sys.stdin) |
| 31 | +requests = collections.defaultdict(list) |
| 32 | +creates = deletes = 0 |
| 33 | +begin = end = None |
| 34 | +for row in reader: |
| 35 | + timestamp = datetime.datetime.strptime(row['eventtime'], '%Y-%m-%dT%H:%M:%SZ') |
| 36 | + #if timestamp < datetime.datetime(2019, 3, 2) or timestamp > datetime.datetime(2019, 3, 2, 4): |
| 37 | + # continue |
| 38 | + if not begin: |
| 39 | + begin = timestamp |
| 40 | + end = timestamp |
| 41 | + event = row['eventname'] |
| 42 | + if row['errorcode']: |
| 43 | + event += error_suffix |
| 44 | + requests[event].append(timestamp) |
| 45 | + |
| 46 | +data = [] |
| 47 | +names = [] |
| 48 | +for event, times in sorted(requests.items()): |
| 49 | + if not event.endswith(error_suffix): |
| 50 | + continue |
| 51 | + data.append(matplotlib.dates.date2num(times)) |
| 52 | + names.append(event) |
| 53 | +for event, times in sorted(requests.items()): |
| 54 | + if event.endswith(error_suffix): |
| 55 | + continue |
| 56 | + data.append(matplotlib.dates.date2num(times)) |
| 57 | + names.append(event) |
| 58 | + |
| 59 | +bin_minutes = 120 |
| 60 | +bins = int((end - begin).total_seconds()) // (60 * bin_minutes) |
| 61 | +locator = matplotlib.dates.AutoDateLocator() |
| 62 | +figure = matplotlib.pyplot.figure() |
| 63 | +figure.set_size_inches(20, 10) |
| 64 | + |
| 65 | +axes = figure.add_subplot(2, 1, 1) |
| 66 | +n, bins, _ = axes.hist(data, bins, histtype='barstacked', rwidth=1, edgecolor='none', label=names) |
| 67 | +axes.set_title('Calls') |
| 68 | +axes.set_xlabel('{} through {} UTC'.format(begin.isoformat(' '), end.isoformat(' '))) |
| 69 | +axes.set_ylabel('stacked count (per {}-minute bin)'.format(bin_minutes)) |
| 70 | +axes.legend(loc='upper left', frameon=False) |
| 71 | +axes.xaxis.set_major_locator(locator) |
| 72 | +axes.xaxis.set_major_formatter(matplotlib.dates.AutoDateFormatter(locator)) |
| 73 | +axes.axis('tight') |
| 74 | + |
| 75 | +vpc_index = names.index('CreateVpc') |
| 76 | +if vpc_index < 0: |
| 77 | + raise ValueError(names) |
| 78 | +vpc_counts = n[vpc_index] |
| 79 | +xs = [] |
| 80 | +ys = [] |
| 81 | +for counts, name in zip(n, names): |
| 82 | + if name == 'CreateVpc': |
| 83 | + continue |
| 84 | + xs.append([(a + b) / 2 for a, b in zip(bins, bins[1:])]) |
| 85 | + ys.append([count / (float(vpc_count) or 1) for count, vpc_count in zip(counts, vpc_counts)]) |
| 86 | + |
| 87 | +axes = figure.add_subplot(2, 1, 2) |
| 88 | +for x, y, name in zip(xs, ys, names): |
| 89 | + axes.plot(x, y, label=name) |
| 90 | +axes.set_xlabel('{} through {} UTC'.format(begin.isoformat(' '), end.isoformat(' '))) |
| 91 | +axes.set_ylabel('count (per {}-minute bin, per VPC)'.format(bin_minutes)) |
| 92 | +axes.legend(loc='upper left', frameon=False) |
| 93 | +axes.xaxis.set_major_locator(locator) |
| 94 | +axes.xaxis.set_major_formatter(matplotlib.dates.AutoDateFormatter(locator)) |
| 95 | +axes.axis('tight') |
| 96 | +axes.set_xlim(bins[0], bins[-1]) |
| 97 | + |
| 98 | +figure.savefig('calls-per-vpc.png') |
| 99 | + |
| 100 | +vpc_count = float(sum(vpc_counts)) |
| 101 | +for counts, name in zip(n, names): |
| 102 | + print('{}\t{:.2f}\t{}'.format(sum(counts), sum(counts) / vpc_count, name)) |
0 commit comments