|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +# Processes OpenEthereum warp snapshot and collects 4-byte code prefixes of all accounts. |
| 4 | +# |
| 5 | +# openethereum --chain=kovan snapshot --snapshot-threads=8 snapshot.warp |
| 6 | +# warp2code-prefixes.py snapshot.warp |
| 7 | + |
| 8 | +import sys |
| 9 | +import rlp |
| 10 | +import snappy |
| 11 | +import collections |
| 12 | + |
| 13 | +prefix_map = collections.defaultdict(int) |
| 14 | + |
| 15 | +filename = sys.argv[1] |
| 16 | +print(f"{filename=}") |
| 17 | + |
| 18 | +with open(filename, 'rb') as f: |
| 19 | + f.seek(0,2) |
| 20 | + size = f.tell() |
| 21 | + print(f"{size=}") |
| 22 | + |
| 23 | + f.seek(-8,2) |
| 24 | + manifest_end = f.tell() |
| 25 | + manifest_off_bytes = f.read(8) |
| 26 | + print(f"{manifest_off_bytes=}") |
| 27 | + |
| 28 | + manifest_off = int.from_bytes(manifest_off_bytes, 'little') |
| 29 | + print(f"{manifest_off=}") |
| 30 | + |
| 31 | + f.seek(manifest_off,0) |
| 32 | + manifest_bytes = f.read(manifest_end-manifest_off) |
| 33 | + |
| 34 | + manifest = rlp.decode(manifest_bytes) |
| 35 | + manifest_ver = int.from_bytes(manifest[0], 'big') |
| 36 | + block_number = int.from_bytes(manifest[4], 'big') |
| 37 | + block_hash = manifest[5] |
| 38 | + print(f"{manifest_ver=}") |
| 39 | + print(f"{block_number=}") |
| 40 | + print(f"block_hash={block_hash.hex()}") |
| 41 | + |
| 42 | + state_chunks = manifest[1] |
| 43 | + num_chunks=len(state_chunks) |
| 44 | + print(f"{num_chunks=}") |
| 45 | + |
| 46 | + for i in range(num_chunks): |
| 47 | + info = state_chunks[i] |
| 48 | + chunk_len = int.from_bytes(info[1], 'big') |
| 49 | + chunk_pos = int.from_bytes(info[2], 'big') |
| 50 | + print(f"{i}/{num_chunks}: {chunk_pos=} {chunk_len=}", end='') |
| 51 | + |
| 52 | + f.seek(chunk_pos) |
| 53 | + chunk_compressed = f.read(chunk_len) |
| 54 | + chunk_bytes = snappy.uncompress(chunk_compressed) |
| 55 | + |
| 56 | + chunk = rlp.decode(chunk_bytes) |
| 57 | + print(f" uncompressed_len={len(chunk_bytes)} num_accounts={len(chunk)}", flush=True) |
| 58 | + for entry in chunk: |
| 59 | + acc = entry[1] |
| 60 | + has_code = acc[2] == b'\x01' |
| 61 | + if has_code: |
| 62 | + code_prefix = bytes(acc[3][:4]) |
| 63 | + prefix_map[code_prefix] += 1 |
| 64 | + |
| 65 | + |
| 66 | + |
| 67 | +for k,v in prefix_map.items(): |
| 68 | + print(f"{k.hex()} : {v}") |
0 commit comments