Explorar el Código

core/state, trie: add node iterator, test state/trie sync consistency

Péter Szilágyi hace 10 años
padre
commit
7e29b0b5b4
Se han modificado 4 ficheros con 451 adiciones y 11 borrados
  1. 133 0
      core/state/iterator.go
  2. 104 3
      core/state/sync_test.go
  3. 117 3
      trie/iterator.go
  4. 97 5
      trie/sync_test.go

+ 133 - 0
core/state/iterator.go

@@ -0,0 +1,133 @@
+// Copyright 2015 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package state
+
+import (
+	"bytes"
+	"fmt"
+	"math/big"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/rlp"
+	"github.com/ethereum/go-ethereum/trie"
+)
+
+// NodeIterator is an iterator to traverse the entire state trie post-order,
+// including all of the contract code and contract state tries.
+type NodeIterator struct {
+	state *StateDB // State being iterated
+
+	stateIt *trie.NodeIterator // Primary iterator for the global state trie
+	dataIt  *trie.NodeIterator // Secondary iterator for the data trie of a contract
+	code    []byte             // Source code associated with a contract
+
+	Entry interface{} // Current state entry being iterated (internal representation)
+}
+
+// NewNodeIterator creates an post-order state node iterator.
+func NewNodeIterator(state *StateDB) *NodeIterator {
+	return &NodeIterator{
+		state: state,
+	}
+}
+
+// Next moves the iterator to the next node, returning whether there are any
+// further nodes.
+func (it *NodeIterator) Next() bool {
+	it.step()
+	return it.retrieve()
+}
+
+// step moves the iterator to the next entry of the state trie.
+func (it *NodeIterator) step() {
+	// Abort if we reached the end of the iteration
+	if it.state == nil {
+		return
+	}
+	// Initialize the iterator if we've just started
+	if it.stateIt == nil {
+		it.stateIt = trie.NewNodeIterator(it.state.trie.Trie)
+	}
+	// If we had data nodes previously, we surely have at least state nodes
+	if it.dataIt != nil {
+		if cont := it.dataIt.Next(); !cont {
+			it.dataIt = nil
+		}
+		return
+	}
+	// If we had source code previously, discard that
+	if it.code != nil {
+		it.code = nil
+		return
+	}
+	// Step to the next state trie node, terminating if we're out of nodes
+	if cont := it.stateIt.Next(); !cont {
+		it.state, it.stateIt = nil, nil
+		return
+	}
+	// If the state trie node is an internal entry, leave as is
+	if !it.stateIt.Leaf {
+		return
+	}
+	// Otherwise we've reached an account node, initiate data iteration
+	var account struct {
+		Nonce    uint64
+		Balance  *big.Int
+		Root     common.Hash
+		CodeHash []byte
+	}
+	err := rlp.Decode(bytes.NewReader(it.stateIt.LeafBlob), &account)
+	if err != nil {
+		panic(err)
+	}
+	dataTrie, err := trie.New(account.Root, it.state.db)
+	if err != nil {
+		panic(err)
+	}
+	it.dataIt = trie.NewNodeIterator(dataTrie)
+	if !it.dataIt.Next() {
+		it.dataIt = nil
+	}
+	if bytes.Compare(account.CodeHash, emptyCodeHash) != 0 {
+		it.code, err = it.state.db.Get(account.CodeHash)
+		if err != nil {
+			panic(fmt.Sprintf("code %x: %v", account.CodeHash, err))
+		}
+	}
+}
+
+// retrieve pulls and caches the current state entry the iterator is traversing.
+// The method returns whether there are any more data left for inspection.
+func (it *NodeIterator) retrieve() bool {
+	// Clear out any previously set values
+	it.Entry = nil
+
+	// If the iteration's done, return no available data
+	if it.state == nil {
+		return false
+	}
+	// Otherwise retrieve the current entry
+	switch {
+	case it.dataIt != nil:
+		it.Entry = it.dataIt.Node
+	case it.code != nil:
+		it.Entry = it.code
+	case it.stateIt != nil:
+		it.Entry = it.stateIt.Node
+	}
+	return true
+}

+ 104 - 3
core/state/sync_test.go

@@ -18,10 +18,12 @@ package state
 
 import (
 	"bytes"
+	"fmt"
 	"math/big"
 	"testing"
 
 	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/crypto"
 	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/trie"
 )
@@ -42,7 +44,7 @@ func makeTestState() (ethdb.Database, common.Hash, []*testAccount) {
 
 	// Fill it with some arbitrary data
 	accounts := []*testAccount{}
-	for i := byte(0); i < 255; i++ {
+	for i := byte(0); i < 96; i++ {
 		obj := state.GetOrNewStateObject(common.BytesToAddress([]byte{i}))
 		acc := &testAccount{address: common.BytesToAddress([]byte{i})}
 
@@ -61,6 +63,9 @@ func makeTestState() (ethdb.Database, common.Hash, []*testAccount) {
 	}
 	root, _ := state.Commit()
 
+	// Remove any potentially cached data from the test state creation
+	trie.ClearGlobalCache()
+
 	// Return the generated state
 	return db, root, accounts
 }
@@ -68,9 +73,18 @@ func makeTestState() (ethdb.Database, common.Hash, []*testAccount) {
 // checkStateAccounts cross references a reconstructed state with an expected
 // account array.
 func checkStateAccounts(t *testing.T, db ethdb.Database, root common.Hash, accounts []*testAccount) {
-	state, _ := New(root, db)
-	for i, acc := range accounts {
+	// Remove any potentially cached data from the state synchronisation
+	trie.ClearGlobalCache()
 
+	// Check root availability and state contents
+	state, err := New(root, db)
+	if err != nil {
+		t.Fatalf("failed to create state trie at %x: %v", root, err)
+	}
+	if err := checkStateConsistency(db, root); err != nil {
+		t.Fatalf("inconsistent state trie at %x: %v", root, err)
+	}
+	for i, acc := range accounts {
 		if balance := state.GetBalance(acc.address); balance.Cmp(acc.balance) != 0 {
 			t.Errorf("account %d: balance mismatch: have %v, want %v", i, balance, acc.balance)
 		}
@@ -83,6 +97,31 @@ func checkStateAccounts(t *testing.T, db ethdb.Database, root common.Hash, accou
 	}
 }
 
+// checkStateConsistency checks that all nodes in a state trie and indeed present.
+func checkStateConsistency(db ethdb.Database, root common.Hash) (failure error) {
+	// Capture any panics by the iterator
+	defer func() {
+		if r := recover(); r != nil {
+			failure = fmt.Errorf("%v", r)
+		}
+	}()
+	// Remove any potentially cached data from the test state creation or previous checks
+	trie.ClearGlobalCache()
+
+	// Create and iterate a state trie rooted in a sub-node
+	if _, err := db.Get(root.Bytes()); err != nil {
+		return
+	}
+	state, err := New(root, db)
+	if err != nil {
+		return
+	}
+	it := NewNodeIterator(state)
+	for it.Next() {
+	}
+	return nil
+}
+
 // Tests that an empty state is not scheduled for syncing.
 func TestEmptyStateSync(t *testing.T) {
 	empty := common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
@@ -236,3 +275,65 @@ func TestIterativeRandomDelayedStateSync(t *testing.T) {
 	// Cross check that the two states are in sync
 	checkStateAccounts(t, dstDb, srcRoot, srcAccounts)
 }
+
+// Tests that at any point in time during a sync, only complete sub-tries are in
+// the database.
+func TestIncompleteStateSync(t *testing.T) {
+	// Create a random state to copy
+	srcDb, srcRoot, srcAccounts := makeTestState()
+
+	// Create a destination state and sync with the scheduler
+	dstDb, _ := ethdb.NewMemDatabase()
+	sched := NewStateSync(srcRoot, dstDb)
+
+	added := []common.Hash{}
+	queue := append([]common.Hash{}, sched.Missing(1)...)
+	for len(queue) > 0 {
+		// Fetch a batch of state nodes
+		results := make([]trie.SyncResult, len(queue))
+		for i, hash := range queue {
+			data, err := srcDb.Get(hash.Bytes())
+			if err != nil {
+				t.Fatalf("failed to retrieve node data for %x: %v", hash, err)
+			}
+			results[i] = trie.SyncResult{hash, data}
+		}
+		// Process each of the state nodes
+		if index, err := sched.Process(results); err != nil {
+			t.Fatalf("failed to process result #%d: %v", index, err)
+		}
+		for _, result := range results {
+			added = append(added, result.Hash)
+		}
+		// Check that all known sub-tries in the synced state is complete
+		for _, root := range added {
+			// Skim through the accounts and make sure the root hash is not a code node
+			codeHash := false
+			for _, acc := range srcAccounts {
+				if bytes.Compare(root.Bytes(), crypto.Sha3(acc.code)) == 0 {
+					codeHash = true
+					break
+				}
+			}
+			// If the root is a real trie node, check consistency
+			if !codeHash {
+				if err := checkStateConsistency(dstDb, root); err != nil {
+					t.Fatalf("state inconsistent: %v", err)
+				}
+			}
+		}
+		// Fetch the next batch to retrieve
+		queue = append(queue[:0], sched.Missing(1)...)
+	}
+	// Sanity check that removing any node from the database is detected
+	for _, node := range added[1:] {
+		key := node.Bytes()
+		value, _ := dstDb.Get(key)
+
+		dstDb.Delete(key)
+		if err := checkStateConsistency(dstDb, added[0]); err == nil {
+			t.Fatalf("trie inconsistency not caught, missing: %x", key)
+		}
+		dstDb.Put(key, value)
+	}
+}

+ 117 - 3
trie/iterator.go

@@ -18,22 +18,26 @@ package trie
 
 import (
 	"bytes"
+	"fmt"
 
 	"github.com/ethereum/go-ethereum/logger"
 	"github.com/ethereum/go-ethereum/logger/glog"
 )
 
+// Iterator is a key-value trie iterator to traverse the data contents.
 type Iterator struct {
 	trie *Trie
 
-	Key   []byte
-	Value []byte
+	Key   []byte // Current data key on which the iterator is positioned on
+	Value []byte // Current data value on which the iterator is positioned on
 }
 
+// NewIterator creates a new key-value iterator.
 func NewIterator(trie *Trie) *Iterator {
 	return &Iterator{trie: trie, Key: nil}
 }
 
+// Next moves the iterator forward with one key-value entry.
 func (self *Iterator) Next() bool {
 	isIterStart := false
 	if self.Key == nil {
@@ -142,6 +146,116 @@ func (self *Iterator) key(node interface{}) []byte {
 		}
 		return self.key(rn)
 	}
-
 	return nil
 }
+
+// nodeIteratorState represents the iteration state at one particular node of the
+// trie, which can be resumed at a later invocation.
+type nodeIteratorState struct {
+	node  node // Trie node being iterated
+	child int  // Child to be processed next
+}
+
+// NodeIterator is an iterator to traverse the trie post-order.
+type NodeIterator struct {
+	trie  *Trie                // Trie being iterated
+	stack []*nodeIteratorState // Hierarchy of trie nodes persisting the iteration state
+
+	Node     node   // Current node being iterated (internal representation)
+	Leaf     bool   // Flag whether the current node is a value (data) node
+	LeafBlob []byte // Data blob contained within a leaf (otherwise nil)
+}
+
+// NewNodeIterator creates an post-order trie iterator.
+func NewNodeIterator(trie *Trie) *NodeIterator {
+	if bytes.Compare(trie.Root(), emptyRoot.Bytes()) == 0 {
+		return new(NodeIterator)
+	}
+	return &NodeIterator{trie: trie}
+}
+
+// Next moves the iterator to the next node, returning whether there are any
+// further nodes.
+func (it *NodeIterator) Next() bool {
+	it.step()
+	return it.retrieve()
+}
+
+// step moves the iterator to the next node of the trie.
+func (it *NodeIterator) step() {
+	// Abort if we reached the end of the iteration
+	if it.trie == nil {
+		return
+	}
+	// Initialize the iterator if we've just started, or pop off the old node otherwise
+	if len(it.stack) == 0 {
+		it.stack = append(it.stack, &nodeIteratorState{node: it.trie.root, child: -1})
+		if it.stack[0].node == nil {
+			panic(fmt.Sprintf("root node missing: %x", it.trie.Root()))
+		}
+	} else {
+		it.stack = it.stack[:len(it.stack)-1]
+		if len(it.stack) == 0 {
+			it.trie = nil
+			return
+		}
+	}
+	// Continue iteration to the next child
+	for {
+		parent := it.stack[len(it.stack)-1]
+		if node, ok := parent.node.(fullNode); ok {
+			// Full node, traverse all children, then the node itself
+			if parent.child >= len(node) {
+				break
+			}
+			for parent.child++; parent.child < len(node); parent.child++ {
+				if current := node[parent.child]; current != nil {
+					it.stack = append(it.stack, &nodeIteratorState{node: current, child: -1})
+					break
+				}
+			}
+		} else if node, ok := parent.node.(shortNode); ok {
+			// Short node, traverse the pointer singleton child, then the node itself
+			if parent.child >= 0 {
+				break
+			}
+			parent.child++
+			it.stack = append(it.stack, &nodeIteratorState{node: node.Val, child: -1})
+		} else if node, ok := parent.node.(hashNode); ok {
+			// Hash node, resolve the hash child from the database, then the node itself
+			if parent.child >= 0 {
+				break
+			}
+			parent.child++
+
+			node, err := it.trie.resolveHash(node, nil, nil)
+			if err != nil {
+				panic(err)
+			}
+			it.stack = append(it.stack, &nodeIteratorState{node: node, child: -1})
+		} else {
+			break
+		}
+	}
+}
+
+// retrieve pulls and caches the current trie node the iterator is traversing.
+// In case of a value node, the additional leaf blob is also populated with the
+// data contents for external interpretation.
+//
+// The method returns whether there are any more data left for inspection.
+func (it *NodeIterator) retrieve() bool {
+	// Clear out any previously set values
+	it.Node, it.Leaf, it.LeafBlob = nil, false, nil
+
+	// If the iteration's done, return no available data
+	if it.trie == nil {
+		return false
+	}
+	// Otherwise retrieve the current node and resolve leaf accessors
+	it.Node = it.stack[len(it.stack)-1].node
+	if value, ok := it.Node.(valueNode); ok {
+		it.Leaf, it.LeafBlob = true, []byte(value)
+	}
+	return true
+}

+ 97 - 5
trie/sync_test.go

@@ -18,6 +18,7 @@ package trie
 
 import (
 	"bytes"
+	"fmt"
 	"testing"
 
 	"github.com/ethereum/go-ethereum/common"
@@ -33,6 +34,7 @@ func makeTestTrie() (ethdb.Database, *Trie, map[string][]byte) {
 	// Fill it with some arbitrary data
 	content := make(map[string][]byte)
 	for i := byte(0); i < 255; i++ {
+		// Map the same data under multiple keys
 		key, val := common.LeftPadBytes([]byte{1, i}, 32), []byte{i}
 		content[string(key)] = val
 		trie.Update(key, val)
@@ -40,9 +42,19 @@ func makeTestTrie() (ethdb.Database, *Trie, map[string][]byte) {
 		key, val = common.LeftPadBytes([]byte{2, i}, 32), []byte{i}
 		content[string(key)] = val
 		trie.Update(key, val)
+
+		// Add some other data to inflate th trie
+		for j := byte(3); j < 13; j++ {
+			key, val = common.LeftPadBytes([]byte{j, i}, 32), []byte{j, i}
+			content[string(key)] = val
+			trie.Update(key, val)
+		}
 	}
 	trie.Commit()
 
+	// Remove any potentially cached data from the test trie creation
+	globalCache.Clear()
+
 	// Return the generated trie
 	return db, trie, content
 }
@@ -50,10 +62,17 @@ func makeTestTrie() (ethdb.Database, *Trie, map[string][]byte) {
 // checkTrieContents cross references a reconstructed trie with an expected data
 // content map.
 func checkTrieContents(t *testing.T, db Database, root []byte, content map[string][]byte) {
+	// Remove any potentially cached data from the trie synchronisation
+	globalCache.Clear()
+
+	// Check root availability and trie contents
 	trie, err := New(common.BytesToHash(root), db)
 	if err != nil {
 		t.Fatalf("failed to create trie at %x: %v", root, err)
 	}
+	if err := checkTrieConsistency(db, common.BytesToHash(root)); err != nil {
+		t.Fatalf("inconsistent trie at %x: %v", root, err)
+	}
 	for key, val := range content {
 		if have := trie.Get([]byte(key)); bytes.Compare(have, val) != 0 {
 			t.Errorf("entry %x: content mismatch: have %x, want %x", key, have, val)
@@ -61,6 +80,28 @@ func checkTrieContents(t *testing.T, db Database, root []byte, content map[strin
 	}
 }
 
+// checkTrieConsistency checks that all nodes in a trie and indeed present.
+func checkTrieConsistency(db Database, root common.Hash) (failure error) {
+	// Capture any panics by the iterator
+	defer func() {
+		if r := recover(); r != nil {
+			failure = fmt.Errorf("%v", r)
+		}
+	}()
+	// Remove any potentially cached data from the test trie creation or previous checks
+	globalCache.Clear()
+
+	// Create and iterate a trie rooted in a subnode
+	trie, err := New(root, db)
+	if err != nil {
+		return
+	}
+	it := NewNodeIterator(trie)
+	for it.Next() {
+	}
+	return nil
+}
+
 // Tests that an empty trie is not scheduled for syncing.
 func TestEmptyTrieSync(t *testing.T) {
 	emptyA, _ := New(common.Hash{}, nil)
@@ -102,7 +143,7 @@ func testIterativeTrieSync(t *testing.T, batch int) {
 		}
 		queue = append(queue[:0], sched.Missing(batch)...)
 	}
-	// Cross check that the two tries re in sync
+	// Cross check that the two tries are in sync
 	checkTrieContents(t, dstDb, srcTrie.Root(), srcData)
 }
 
@@ -132,7 +173,7 @@ func TestIterativeDelayedTrieSync(t *testing.T) {
 		}
 		queue = append(queue[len(results):], sched.Missing(10000)...)
 	}
-	// Cross check that the two tries re in sync
+	// Cross check that the two tries are in sync
 	checkTrieContents(t, dstDb, srcTrie.Root(), srcData)
 }
 
@@ -173,7 +214,7 @@ func testIterativeRandomTrieSync(t *testing.T, batch int) {
 			queue[hash] = struct{}{}
 		}
 	}
-	// Cross check that the two tries re in sync
+	// Cross check that the two tries are in sync
 	checkTrieContents(t, dstDb, srcTrie.Root(), srcData)
 }
 
@@ -216,7 +257,7 @@ func TestIterativeRandomDelayedTrieSync(t *testing.T) {
 			queue[hash] = struct{}{}
 		}
 	}
-	// Cross check that the two tries re in sync
+	// Cross check that the two tries are in sync
 	checkTrieContents(t, dstDb, srcTrie.Root(), srcData)
 }
 
@@ -252,6 +293,57 @@ func TestDuplicateAvoidanceTrieSync(t *testing.T) {
 		}
 		queue = append(queue[:0], sched.Missing(0)...)
 	}
-	// Cross check that the two tries re in sync
+	// Cross check that the two tries are in sync
 	checkTrieContents(t, dstDb, srcTrie.Root(), srcData)
 }
+
+// Tests that at any point in time during a sync, only complete sub-tries are in
+// the database.
+func TestIncompleteTrieSync(t *testing.T) {
+	// Create a random trie to copy
+	srcDb, srcTrie, _ := makeTestTrie()
+
+	// Create a destination trie and sync with the scheduler
+	dstDb, _ := ethdb.NewMemDatabase()
+	sched := NewTrieSync(common.BytesToHash(srcTrie.Root()), dstDb, nil)
+
+	added := []common.Hash{}
+	queue := append([]common.Hash{}, sched.Missing(1)...)
+	for len(queue) > 0 {
+		// Fetch a batch of trie nodes
+		results := make([]SyncResult, len(queue))
+		for i, hash := range queue {
+			data, err := srcDb.Get(hash.Bytes())
+			if err != nil {
+				t.Fatalf("failed to retrieve node data for %x: %v", hash, err)
+			}
+			results[i] = SyncResult{hash, data}
+		}
+		// Process each of the trie nodes
+		if index, err := sched.Process(results); err != nil {
+			t.Fatalf("failed to process result #%d: %v", index, err)
+		}
+		for _, result := range results {
+			added = append(added, result.Hash)
+		}
+		// Check that all known sub-tries in the synced trie is complete
+		for _, root := range added {
+			if err := checkTrieConsistency(dstDb, root); err != nil {
+				t.Fatalf("trie inconsistent: %v", err)
+			}
+		}
+		// Fetch the next batch to retrieve
+		queue = append(queue[:0], sched.Missing(1)...)
+	}
+	// Sanity check that removing any node from the database is detected
+	for _, node := range added[1:] {
+		key := node.Bytes()
+		value, _ := dstDb.Get(key)
+
+		dstDb.Delete(key)
+		if err := checkTrieConsistency(dstDb, added[0]); err == nil {
+			t.Fatalf("trie inconsistency not caught, missing: %x", key)
+		}
+		dstDb.Put(key, value)
+	}
+}