Bläddra i källkod

core/state/snapshot, true: reuse dirty data instead of hitting disk when generating (#22667)

* core/state/snapshot: reuse memory data instead of hitting disk when generating

* trie: minor nitpicks wrt the resolver optimization

* core/state/snapshot, trie: use key/value store for resolver

* trie: fix linter

Co-authored-by: Péter Szilágyi <peterke@gmail.com>
Martin Holst Swende 4 år sedan
förälder
incheckning
49281ab84f
2 ändrade filer med 64 tillägg och 6 borttagningar
  1. 19 1
      core/state/snapshot/generate.go
  2. 45 5
      trie/iterator.go

+ 19 - 1
core/state/snapshot/generate.go

@@ -31,6 +31,7 @@ import (
 	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/crypto"
 	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/ethdb/memorydb"
 	"github.com/ethereum/go-ethereum/log"
 	"github.com/ethereum/go-ethereum/metrics"
 	"github.com/ethereum/go-ethereum/rlp"
@@ -434,6 +435,20 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string,
 		}
 		meter.Mark(1)
 	}
+
+	// We use the snap data to build up a cache which can be used by the
+	// main account trie as a primary lookup when resolving hashes
+	var snapNodeCache ethdb.KeyValueStore
+	if len(result.keys) > 0 {
+		snapNodeCache = memorydb.New()
+		snapTrieDb := trie.NewDatabase(snapNodeCache)
+		snapTrie, _ := trie.New(common.Hash{}, snapTrieDb)
+		for i, key := range result.keys {
+			snapTrie.Update(key, result.vals[i])
+		}
+		root, _ := snapTrie.Commit(nil)
+		snapTrieDb.Commit(root, false, nil)
+	}
 	tr := result.tr
 	if tr == nil {
 		tr, err = trie.New(root, dl.triedb)
@@ -442,9 +457,11 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string,
 			return false, nil, errMissingTrie
 		}
 	}
+
 	var (
 		trieMore       bool
-		iter           = trie.NewIterator(tr.NodeIterator(origin))
+		nodeIt         = tr.NodeIterator(origin)
+		iter           = trie.NewIterator(nodeIt)
 		kvkeys, kvvals = result.keys, result.vals
 
 		// counters
@@ -458,6 +475,7 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string,
 		start    = time.Now()
 		internal time.Duration
 	)
+	nodeIt.AddResolver(snapNodeCache)
 	for iter.Next() {
 		if last != nil && bytes.Compare(iter.Key, last) > 0 {
 			trieMore = true

+ 45 - 5
trie/iterator.go

@@ -22,6 +22,7 @@ import (
 	"errors"
 
 	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/rlp"
 )
 
@@ -102,6 +103,19 @@ type NodeIterator interface {
 	// iterator is not positioned at a leaf. Callers must not retain references
 	// to the value after calling Next.
 	LeafProof() [][]byte
+
+	// AddResolver sets an intermediate database to use for looking up trie nodes
+	// before reaching into the real persistent layer.
+	//
+	// This is not required for normal operation, rather is an optimization for
+	// cases where trie nodes can be recovered from some external mechanism without
+	// reading from disk. In those cases, this resolver allows short circuiting
+	// accesses and returning them from memory.
+	//
+	// Before adding a similar mechanism to any other place in Geth, consider
+	// making trie.Database an interface and wrapping at that level. It's a huge
+	// refactor, but it could be worth it if another occurrence arises.
+	AddResolver(ethdb.KeyValueStore)
 }
 
 // nodeIteratorState represents the iteration state at one particular node of the
@@ -119,6 +133,8 @@ type nodeIterator struct {
 	stack []*nodeIteratorState // Hierarchy of trie nodes persisting the iteration state
 	path  []byte               // Path to the current node
 	err   error                // Failure set in case of an internal error in the iterator
+
+	resolver ethdb.KeyValueStore // Optional intermediate resolver above the disk layer
 }
 
 // errIteratorEnd is stored in nodeIterator.err when iteration is done.
@@ -143,6 +159,10 @@ func newNodeIterator(trie *Trie, start []byte) NodeIterator {
 	return it
 }
 
+func (it *nodeIterator) AddResolver(resolver ethdb.KeyValueStore) {
+	it.resolver = resolver
+}
+
 func (it *nodeIterator) Hash() common.Hash {
 	if len(it.stack) == 0 {
 		return common.Hash{}
@@ -262,7 +282,7 @@ func (it *nodeIterator) init() (*nodeIteratorState, error) {
 	if root != emptyRoot {
 		state.hash = root
 	}
-	return state, state.resolve(it.trie, nil)
+	return state, state.resolve(it, nil)
 }
 
 // peek creates the next state of the iterator.
@@ -286,7 +306,7 @@ func (it *nodeIterator) peek(descend bool) (*nodeIteratorState, *int, []byte, er
 		}
 		state, path, ok := it.nextChild(parent, ancestor)
 		if ok {
-			if err := state.resolve(it.trie, path); err != nil {
+			if err := state.resolve(it, path); err != nil {
 				return parent, &parent.index, path, err
 			}
 			return state, &parent.index, path, nil
@@ -319,7 +339,7 @@ func (it *nodeIterator) peekSeek(seekKey []byte) (*nodeIteratorState, *int, []by
 		}
 		state, path, ok := it.nextChildAt(parent, ancestor, seekKey)
 		if ok {
-			if err := state.resolve(it.trie, path); err != nil {
+			if err := state.resolve(it, path); err != nil {
 				return parent, &parent.index, path, err
 			}
 			return state, &parent.index, path, nil
@@ -330,9 +350,21 @@ func (it *nodeIterator) peekSeek(seekKey []byte) (*nodeIteratorState, *int, []by
 	return nil, nil, nil, errIteratorEnd
 }
 
-func (st *nodeIteratorState) resolve(tr *Trie, path []byte) error {
+func (it *nodeIterator) resolveHash(hash hashNode, path []byte) (node, error) {
+	if it.resolver != nil {
+		if blob, err := it.resolver.Get(hash); err == nil && len(blob) > 0 {
+			if resolved, err := decodeNode(hash, blob); err == nil {
+				return resolved, nil
+			}
+		}
+	}
+	resolved, err := it.trie.resolveHash(hash, path)
+	return resolved, err
+}
+
+func (st *nodeIteratorState) resolve(it *nodeIterator, path []byte) error {
 	if hash, ok := st.node.(hashNode); ok {
-		resolved, err := tr.resolveHash(hash, path)
+		resolved, err := it.resolveHash(hash, path)
 		if err != nil {
 			return err
 		}
@@ -517,6 +549,10 @@ func (it *differenceIterator) Path() []byte {
 	return it.b.Path()
 }
 
+func (it *differenceIterator) AddResolver(resolver ethdb.KeyValueStore) {
+	panic("not implemented")
+}
+
 func (it *differenceIterator) Next(bool) bool {
 	// Invariants:
 	// - We always advance at least one element in b.
@@ -624,6 +660,10 @@ func (it *unionIterator) Path() []byte {
 	return (*it.items)[0].Path()
 }
 
+func (it *unionIterator) AddResolver(resolver ethdb.KeyValueStore) {
+	panic("not implemented")
+}
+
 // Next returns the next node in the union of tries being iterated over.
 //
 // It does this by maintaining a heap of iterators, sorted by the iteration