Skip to content

Initial implementation of era1 + export + verify #1998

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
392 changes: 392 additions & 0 deletions fluffy/eth_data/era1.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,392 @@
# fluffy
# Copyright (c) 2024 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

{.push raises: [].}

import
std/[strformat, typetraits],
results, stew/[endians2, io2, byteutils, arrayops],
stint, snappy,
eth/common/eth_types_rlp,
beacon_chain/spec/beacon_time,
ssz_serialization,
ncli/e2store,
../network/history/accumulator

from nimcrypto/hash import fromHex
from ../../nimbus/utils/utils import calcTxRoot, calcReceiptRoot

export e2store.readRecord

# Implementation of era1 file format as current described in:
# https://github.com/ethereum/go-ethereum/pull/26621

# era1 := Version | block-tuple* | other-entries* | Accumulator | BlockIndex
# block-tuple := CompressedHeader | CompressedBody | CompressedReceipts | TotalDifficulty

# block-index := starting-number | index | index | index ... | count

# CompressedHeader = { type: 0x03, data: snappyFramed(rlp(header)) }
# CompressedBody = { type: 0x04, data: snappyFramed(rlp(body)) }
# CompressedReceipts = { type: 0x05, data: snappyFramed(rlp(receipts)) }
# TotalDifficulty = { type: 0x06, data: uint256(header.total_difficulty) }
# AccumulatorRoot = { type: 0x07, data: hash_tree_root(List(HeaderRecord, 8192)) }
# BlockIndex = { type: 0x3266, data: block-index }

# Important note:
# Snappy does not give the same compression result as the implementation used
# by go-ethereum for some block headers and block bodies. This means that we
# cannot rely on the secondary verification mechanism that is based on doing the
# sha256sum of the full era1 files.
#

const
# Note: When specification is more official, these could go with the other
# E2S types.
CompressedHeader* = [byte 0x03, 0x00]
CompressedBody* = [byte 0x04, 0x00]
CompressedReceipts* = [byte 0x05, 0x00]
TotalDifficulty* = [byte 0x06, 0x00]
AccumulatorRoot* = [byte 0x07, 0x00]
E2BlockIndex* = [byte 0x66, 0x32]

MaxEra1Size* = 8192

type
BlockIndex* = object
startNumber*: uint64
offsets*: seq[int64] # Absolute positions in file

Era1* = distinct uint64 # Period of 8192 blocks (not an exact time unit)

Era1Group* = object
blockIndex*: BlockIndex

# As stated, not really a time unit but nevertheless, need the borrows
ethTimeUnit Era1

template lenu64(x: untyped): untyped =
uint64(len(x))

# Note: appendIndex, appendRecord and readIndex for BlockIndex are very similar
# to its consensus layer counter parts. The difference lies in the naming of
# slots vs block numbers and there is different behavior for the first era
# (first slot) and the last era (era1 ends at merge block).

proc appendIndex*(
f: IoHandle, startNumber: uint64, offsets: openArray[int64]):
Result[int64, string] =
let
len = offsets.len() * sizeof(int64) + 16
pos = ? f.appendHeader(E2BlockIndex, len)

? f.append(startNumber.uint64.toBytesLE())

for v in offsets:
? f.append(cast[uint64](v - pos).toBytesLE())

? f.append(offsets.lenu64().toBytesLE())

ok(pos)

proc appendRecord(f: IoHandle, index: BlockIndex): Result[int64, string] =
f.appendIndex(index.startNumber, index.offsets)

proc readBlockIndex*(f: IoHandle): Result[BlockIndex, string] =
let
startPos = ? f.getFilePos().mapErr(toString)
fileSize = ? f.getFileSize().mapErr(toString)
header = ? f.readHeader()

if header.typ != E2BlockIndex: return err("not an index")
if header.len < 16: return err("index entry too small")
if header.len mod 8 != 0: return err("index length invalid")

var buf: array[8, byte]
? f.readFileExact(buf)
let
blockNumber = uint64.fromBytesLE(buf)
count = header.len div 8 - 2

var offsets = newSeqUninitialized[int64](count)
for i in 0..<count:
? f.readFileExact(buf)

let
offset = uint64.fromBytesLE(buf)
absolute =
if offset == 0: 0'i64
else:
# Wrapping math is actually convenient here
cast[int64](cast[uint64](startPos) + offset)

if absolute < 0 or absolute > fileSize: return err("Invalid offset")
offsets[i] = absolute

? f.readFileExact(buf)
if uint64(count) != uint64.fromBytesLE(buf): return err("invalid count")

# technically not an error, but we'll throw this sanity check in here..
if blockNumber > int32.high().uint64: return err("fishy block number")

ok(BlockIndex(startNumber: blockNumber, offsets: offsets))

func startNumber*(era: Era1): uint64 =
era * MaxEra1Size

func endNumber*(era: Era1): uint64 =
if (era + 1) * MaxEra1Size - 1'u64 >= mergeBlockNumber:
# The incomplete era just before the merge
mergeBlockNumber - 1'u64
else:
(era + 1) * MaxEra1Size - 1'u64

func endNumber*(blockIdx: BlockIndex): uint64 =
blockIdx.startNumber + blockIdx.offsets.lenu64() - 1

func era*(blockNumber: uint64): Era1 =
Era1(blockNumber div MaxEra1Size)

func offsetsLen(startNumber: uint64): int =
# For the era where the merge happens the era files only holds the blocks
# until the merge block so the offsets length needs to be adapted too.
if startNumber.era() >= mergeBlockNumber.era():
int((mergeBlockNumber) mod MaxEra1Size)
else:
MaxEra1Size

proc toCompressedRlpBytes(item: auto): seq[byte] =
snappy.encodeFramed(rlp.encode(item))

proc fromCompressedRlpBytes(bytes: openArray[byte], T: type): Result[T, string] =
try:
ok(rlp.decode(decodeFramed(bytes, checkIntegrity = false), T))
except RlpError as e:
err("Invalid Compressed RLP data" & e.msg)

proc init*(
T: type Era1Group, f: IoHandle, startNumber: uint64
): Result[T, string] =
discard ? f.appendHeader(E2Version, 0)

ok(Era1Group(
blockIndex: BlockIndex(
startNumber: startNumber,
offsets: newSeq[int64](startNumber.offsetsLen())
)))

proc update*(
g: var Era1Group, f: IoHandle, blockNumber: uint64,
header, body, receipts, totalDifficulty: openArray[byte]
): Result[void, string] =
doAssert blockNumber >= g.blockIndex.startNumber

g.blockIndex.offsets[int(blockNumber - g.blockIndex.startNumber)] =
? f.appendRecord(CompressedHeader, header)
discard ? f.appendRecord(CompressedBody, body)
discard ? f.appendRecord(CompressedReceipts, receipts)
discard ? f.appendRecord(TotalDifficulty, totalDifficulty)

ok()

proc update*(
g: var Era1Group, f: IoHandle, blockNumber: uint64,
header: BlockHeader, body: BlockBody, receipts: seq[Receipt],
totalDifficulty: UInt256
): Result[void, string] =
g.update(
f, blockNumber,
toCompressedRlpBytes(header),
toCompressedRlpBytes(body),
toCompressedRlpBytes(receipts),
totalDifficulty.toBytesLE()
)

proc finish*(
g: var Era1Group, f: IoHandle, accumulatorRoot: Digest, lastBlockNumber: uint64
):Result[void, string] =
let accumulatorRootPos = ? f.appendRecord(AccumulatorRoot, accumulatorRoot.data)

if lastBlockNumber > 0:
discard ? f.appendRecord(g.blockIndex)

# TODO:
# This is not something added in current specification of era1.
# But perhaps we want to be able to quickly jump to acummulator root.
# discard ? f.appendIndex(lastBlockNumber, [accumulatorRootPos])
discard accumulatorRootPos

ok()

func shortLog*(x: Digest): string =
x.data.toOpenArray(0, 3).toHex()

func era1FileName*(network: string, era: Era1, eraRoot: Digest): string =
try:
&"{network}-{era.uint64:05}-{shortLog(eraRoot)}.era1"
except ValueError as exc:
raiseAssert exc.msg

# Helpers to directly read objects from era1 files
# TODO: Might want to var parameters to avoid copying as is done for era files.

type
Era1File* = ref object
handle: Opt[IoHandle]
blockIdx: BlockIndex

proc open*(_: type Era1File, name: string): Result[Era1File, string] =
var
f = Opt[IoHandle].ok(? openFile(name, {OpenFlags.Read}).mapErr(ioErrorMsg))

defer:
if f.isSome(): discard closeFile(f[])

# Indices can be found at the end of each era file - we only support
# single-era files for now
? f[].setFilePos(0, SeekPosition.SeekEnd).mapErr(ioErrorMsg)

# Last in the file is the block index
let
blockIdxPos = ? f[].findIndexStartOffset()
? f[].setFilePos(blockIdxPos, SeekPosition.SeekCurrent).mapErr(ioErrorMsg)

let blockIdx = ? f[].readBlockIndex()
if blockIdx.offsets.len() != blockIdx.startNumber.offsetsLen():
return err("Block index length invalid")

let res = Era1File(handle: f, blockIdx: blockIdx)
reset(f)
ok res

proc close*(f: Era1File) =
if f.handle.isSome():
discard closeFile(f.handle.get())
reset(f.handle)

proc getBlockHeader(f: Era1File): Result[BlockHeader, string] =
var bytes: seq[byte]

let header = ? f[].handle.get().readRecord(bytes)
if header.typ != CompressedHeader:
return err("Invalid era file: didn't find block header at index position")

fromCompressedRlpBytes(bytes, BlockHeader)

proc getBlockBody(f: Era1File): Result[BlockBody, string] =
var bytes: seq[byte]

let header = ? f[].handle.get().readRecord(bytes)
if header.typ != CompressedBody:
return err("Invalid era file: didn't find block body at index position")

fromCompressedRlpBytes(bytes, BlockBody)

proc getReceipts(f: Era1File): Result[seq[Receipt], string] =
var bytes: seq[byte]

let header = ? f[].handle.get().readRecord(bytes)
if header.typ != CompressedReceipts:
return err("Invalid era file: didn't find receipts at index position")

fromCompressedRlpBytes(bytes, seq[Receipt])

proc getTotalDifficulty(f: Era1File): Result[UInt256, string] =
var bytes: seq[byte]

let header = ? f[].handle.get().readRecord(bytes)
if header.typ != TotalDifficulty:
return err("Invalid era file: didn't find total difficulty at index position")

if bytes.len != 32:
return err("Invalid total difficulty length")

ok(UInt256.fromBytesLE(bytes))

proc getNextBlockTuple*(
f: Era1File
): Result[(BlockHeader, BlockBody, seq[Receipt], UInt256), string] =
doAssert not isNil(f) and f[].handle.isSome

let
blockHeader = ? getBlockHeader(f)
blockBody = ? getBlockBody(f)
receipts = ? getReceipts(f)
totalDifficulty = ? getTotalDifficulty(f)

ok((blockHeader, blockBody, receipts, totalDifficulty))

proc getBlockTuple*(
f: Era1File, blockNumber: uint64
): Result[(BlockHeader, BlockBody, seq[Receipt], UInt256), string] =
doAssert not isNil(f) and f[].handle.isSome
doAssert(
blockNumber >= f[].blockIdx.startNumber and
blockNumber <= f[].blockIdx.endNumber,
"Wrong era1 file for selected block number")

let pos = f[].blockIdx.offsets[blockNumber - f[].blockIdx.startNumber]

? f[].handle.get().setFilePos(pos, SeekPosition.SeekBegin).mapErr(ioErrorMsg)

getNextBlockTuple(f)

# TODO: Should we add this perhaps in the Era1File object and grab it in open()?
proc getAccumulatorRoot*(f: Era1File): Result[Digest, string] =
# Get position of BlockIndex
? f[].handle.get().setFilePos(0, SeekPosition.SeekEnd).mapErr(ioErrorMsg)
let blockIdxPos = ? f[].handle.get().findIndexStartOffset()

# Accumulator root is 40 bytes before the BlockIndex
let accumulatorRootPos = blockIdxPos - 40 # 8 + 32
? f[].handle.get().setFilePos(accumulatorRootPos, SeekPosition.SeekCurrent).mapErr(ioErrorMsg)

var bytes: seq[byte]
let header = ? f[].handle.get().readRecord(bytes)

if header.typ != AccumulatorRoot:
return err("Invalid era file: didn't find accumulator root at index position")

if bytes.len != 32:
return err("invalid accumulator root")

ok(Digest(data: array[32, byte].initCopyFrom(bytes)))

proc verify*(f: Era1File): Result[Digest, string] =
let
startNumber = f.blockIdx.startNumber
endNumber = f.blockIdx.endNumber()

var headerRecords: seq[HeaderRecord]
for blockNumber in startNumber..endNumber:
let
(blockHeader, blockBody, receipts, totalDifficulty) =
? f.getBlockTuple(blockNumber)

txRoot = calcTxRoot(blockBody.transactions)
ommershHash = keccakHash(rlp.encode(blockBody.uncles))

if blockHeader.txRoot != txRoot:
return err("Invalid transactions root")

if blockHeader.ommersHash != ommershHash:
return err("Invalid ommers hash")

if blockHeader.receiptRoot != calcReceiptRoot(receipts):
return err("Invalid receipts root")

headerRecords.add(HeaderRecord(
blockHash: blockHeader.blockHash(),
totalDifficulty: totalDifficulty))

let expectedRoot = ? f.getAccumulatorRoot()
let accumulatorRoot = getEpochAccumulatorRoot(headerRecords)

if accumulatorRoot != expectedRoot:
err("Invalid accumulator root")
else:
ok(accumulatorRoot)
Loading