Skip to content

Commit 35a5749

Browse files
authored
Add scripts for collecting code prefixes (#1)
1 parent 734d45e commit 35a5749

File tree

2 files changed

+92
-0
lines changed

2 files changed

+92
-0
lines changed

code_prefixes/snap2code-prefixes.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/usr/bin/env python
2+
3+
# Filters geth snapshot dump and collects 4-byte code prefixes of all accounts.
4+
#
5+
# geth snapshot dump --nostorage | snap2code-prefixes.py > code-prefixes.txt
6+
7+
import collections
8+
import fileinput
9+
import json
10+
11+
prefixes = collections.defaultdict(int)
12+
13+
for line in fileinput.input():
14+
account = json.loads(line)
15+
try:
16+
code = account['code']
17+
prefix = code[2:10]
18+
prefixes[prefix] += 1
19+
except KeyError as e:
20+
if e.args[0] == 'code':
21+
pass
22+
23+
for prefix, count in prefixes.items():
24+
print(f"{prefix:8}: {count}")

code_prefixes/warp2code-prefixes.py

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#!/usr/bin/env python
2+
3+
# Processes OpenEthereum warp snapshot and collects 4-byte code prefixes of all accounts.
4+
#
5+
# openethereum --chain=kovan snapshot --snapshot-threads=8 snapshot.warp
6+
# warp2code-prefixes.py snapshot.warp
7+
8+
import sys
9+
import rlp
10+
import snappy
11+
import collections
12+
13+
prefix_map = collections.defaultdict(int)
14+
15+
filename = sys.argv[1]
16+
print(f"{filename=}")
17+
18+
with open(filename, 'rb') as f:
19+
f.seek(0,2)
20+
size = f.tell()
21+
print(f"{size=}")
22+
23+
f.seek(-8,2)
24+
manifest_end = f.tell()
25+
manifest_off_bytes = f.read(8)
26+
print(f"{manifest_off_bytes=}")
27+
28+
manifest_off = int.from_bytes(manifest_off_bytes, 'little')
29+
print(f"{manifest_off=}")
30+
31+
f.seek(manifest_off,0)
32+
manifest_bytes = f.read(manifest_end-manifest_off)
33+
34+
manifest = rlp.decode(manifest_bytes)
35+
manifest_ver = int.from_bytes(manifest[0], 'big')
36+
block_number = int.from_bytes(manifest[4], 'big')
37+
block_hash = manifest[5]
38+
print(f"{manifest_ver=}")
39+
print(f"{block_number=}")
40+
print(f"block_hash={block_hash.hex()}")
41+
42+
state_chunks = manifest[1]
43+
num_chunks=len(state_chunks)
44+
print(f"{num_chunks=}")
45+
46+
for i in range(num_chunks):
47+
info = state_chunks[i]
48+
chunk_len = int.from_bytes(info[1], 'big')
49+
chunk_pos = int.from_bytes(info[2], 'big')
50+
print(f"{i}/{num_chunks}: {chunk_pos=} {chunk_len=}", end='')
51+
52+
f.seek(chunk_pos)
53+
chunk_compressed = f.read(chunk_len)
54+
chunk_bytes = snappy.uncompress(chunk_compressed)
55+
56+
chunk = rlp.decode(chunk_bytes)
57+
print(f" uncompressed_len={len(chunk_bytes)} num_accounts={len(chunk)}", flush=True)
58+
for entry in chunk:
59+
acc = entry[1]
60+
has_code = acc[2] == b'\x01'
61+
if has_code:
62+
code_prefix = bytes(acc[3][:4])
63+
prefix_map[code_prefix] += 1
64+
65+
66+
67+
for k,v in prefix_map.items():
68+
print(f"{k.hex()} : {v}")

0 commit comments

Comments
 (0)