ソースを参照

core, eth: faster snapshot generation (#22504)

* eth/protocols: persist received state segments

* core: initial implementation

* core/state/snapshot: add tests

* core, eth: updates

* eth/protocols/snapshot: count flat state size

* core/state: add metrics

* core/state/snapshot: skip unnecessary deletion

* core/state/snapshot: rename

* core/state/snapshot: use the global batch

* core/state/snapshot: add logs and fix wiping

* core/state/snapshot: fix

* core/state/snapshot: save generation progress even if the batch is empty

* core/state/snapshot: fixes

* core/state/snapshot: fix initial account range length

* core/state/snapshot: fix initial account range

* eth/protocols/snap: store flat states during the healing

* eth/protocols/snap: print logs

* core/state/snapshot: refactor (#4)

* core/state/snapshot: refactor

* core/state/snapshot: tiny fix and polish

Co-authored-by: rjl493456442 <garyrong0905@gmail.com>

* core, eth: fixes

* core, eth: fix healing writer

* core, trie, eth: fix paths

* eth/protocols/snap: fix encoding

* eth, core: add debug log

* core/state/generate: release iterator asap (#5)

core/state/snapshot: less copy

core/state/snapshot: revert split loop

core/state/snapshot: handle storage becoming empty, improve test robustness

core/state: test modified codehash

core/state/snapshot: polish

* core/state/snapshot: optimize stats counter

* core, eth: add metric

* core/state/snapshot: update comments

* core/state/snapshot: improve tests

* core/state/snapshot: replace secure trie with standard trie

* core/state/snapshot: wrap return as the struct

* core/state/snapshot: skip wiping correct states

* core/state/snapshot: updates

* core/state/snapshot: fixes

* core/state/snapshot: fix panic due to reference flaw in closure

* core/state/snapshot: fix errors in state generation logic + fix log output

* core/state/snapshot: remove an error case

* core/state/snapshot: fix condition-check for exhausted snap state

* core/state/snapshot: use stackTrie for small tries

* core/state/snapshot: don't resolve small storage tries in vain

* core/state/snapshot: properly clean up storage of deleted accounts

* core/state/snapshot: avoid RLP-encoding in some cases + minor nitpicks

* core/state/snapshot: fix error (+testcase)

* core/state/snapshot: clean up tests a bit

* core/state/snapshot: work in progress on better tests

* core/state/snapshot: polish code

* core/state/snapshot: fix trie iteration abortion trigger

* core/state/snapshot: fixes flaws

* core/state/snapshot: remove panic

* core/state/snapshot: fix abort

* core/state/snapshot: more tests (plus failing testcase)

* core/state/snapshot: more testcases + fix for failing test

* core/state/snapshot: testcase for malformed data

* core/state/snapshot: some test nitpicks

* core/state/snapshot: improvements to logging

* core/state/snapshot: testcase to demo error in abortion

* core/state/snapshot: fix abortion

* cmd/geth: make verify-state report the root

* trie: fix failing test

* core/state/snapshot: add timer metrics

* core/state/snapshot: fix metrics

* core/state/snapshot: udpate tests

* eth/protocols/snap: write snapshot account even if code or state is needed

* core/state/snapshot: fix diskmore check

* core/state/snapshot: review fixes

* core/state/snapshot: improve error message

* cmd/geth: rename 'error' to 'err' in logs

* core/state/snapshot: fix some review concerns

* core/state/snapshot, eth/protocols/snap: clear snapshot marker when starting/resuming snap sync

* core: add error log

* core/state/snapshot: use proper timers for metrics collection

* core/state/snapshot: address some review concerns

* eth/protocols/snap: improved log message

* eth/protocols/snap: fix heal logs to condense infos

* core/state/snapshot: wait for generator termination before restarting

* core/state/snapshot: revert timers to counters to track total time

Co-authored-by: Martin Holst Swende <martin@swende.se>
Co-authored-by: Péter Szilágyi <peterke@gmail.com>
gary rong 4 年 前
コミット
7088f1e814

+ 19 - 19
cmd/geth/snapshot.go

@@ -155,7 +155,7 @@ func pruneState(ctx *cli.Context) error {
 	chaindb := utils.MakeChainDatabase(ctx, stack, false)
 	pruner, err := pruner.NewPruner(chaindb, stack.ResolvePath(""), stack.ResolvePath(config.Eth.TrieCleanCacheJournal), ctx.GlobalUint64(utils.BloomFilterSizeFlag.Name))
 	if err != nil {
-		log.Error("Failed to open snapshot tree", "error", err)
+		log.Error("Failed to open snapshot tree", "err", err)
 		return err
 	}
 	if ctx.NArg() > 1 {
@@ -166,12 +166,12 @@ func pruneState(ctx *cli.Context) error {
 	if ctx.NArg() == 1 {
 		targetRoot, err = parseRoot(ctx.Args()[0])
 		if err != nil {
-			log.Error("Failed to resolve state root", "error", err)
+			log.Error("Failed to resolve state root", "err", err)
 			return err
 		}
 	}
 	if err = pruner.Prune(targetRoot); err != nil {
-		log.Error("Failed to prune state", "error", err)
+		log.Error("Failed to prune state", "err", err)
 		return err
 	}
 	return nil
@@ -189,7 +189,7 @@ func verifyState(ctx *cli.Context) error {
 	}
 	snaptree, err := snapshot.New(chaindb, trie.NewDatabase(chaindb), 256, headBlock.Root(), false, false, false)
 	if err != nil {
-		log.Error("Failed to open snapshot tree", "error", err)
+		log.Error("Failed to open snapshot tree", "err", err)
 		return err
 	}
 	if ctx.NArg() > 1 {
@@ -200,15 +200,15 @@ func verifyState(ctx *cli.Context) error {
 	if ctx.NArg() == 1 {
 		root, err = parseRoot(ctx.Args()[0])
 		if err != nil {
-			log.Error("Failed to resolve state root", "error", err)
+			log.Error("Failed to resolve state root", "err", err)
 			return err
 		}
 	}
 	if err := snaptree.Verify(root); err != nil {
-		log.Error("Failed to verfiy state", "error", err)
+		log.Error("Failed to verfiy state", "root", root, "err", err)
 		return err
 	}
-	log.Info("Verified the state")
+	log.Info("Verified the state", "root", root)
 	return nil
 }
 
@@ -236,7 +236,7 @@ func traverseState(ctx *cli.Context) error {
 	if ctx.NArg() == 1 {
 		root, err = parseRoot(ctx.Args()[0])
 		if err != nil {
-			log.Error("Failed to resolve state root", "error", err)
+			log.Error("Failed to resolve state root", "err", err)
 			return err
 		}
 		log.Info("Start traversing the state", "root", root)
@@ -247,7 +247,7 @@ func traverseState(ctx *cli.Context) error {
 	triedb := trie.NewDatabase(chaindb)
 	t, err := trie.NewSecure(root, triedb)
 	if err != nil {
-		log.Error("Failed to open trie", "root", root, "error", err)
+		log.Error("Failed to open trie", "root", root, "err", err)
 		return err
 	}
 	var (
@@ -262,13 +262,13 @@ func traverseState(ctx *cli.Context) error {
 		accounts += 1
 		var acc state.Account
 		if err := rlp.DecodeBytes(accIter.Value, &acc); err != nil {
-			log.Error("Invalid account encountered during traversal", "error", err)
+			log.Error("Invalid account encountered during traversal", "err", err)
 			return err
 		}
 		if acc.Root != emptyRoot {
 			storageTrie, err := trie.NewSecure(acc.Root, triedb)
 			if err != nil {
-				log.Error("Failed to open storage trie", "root", acc.Root, "error", err)
+				log.Error("Failed to open storage trie", "root", acc.Root, "err", err)
 				return err
 			}
 			storageIter := trie.NewIterator(storageTrie.NodeIterator(nil))
@@ -276,7 +276,7 @@ func traverseState(ctx *cli.Context) error {
 				slots += 1
 			}
 			if storageIter.Err != nil {
-				log.Error("Failed to traverse storage trie", "root", acc.Root, "error", storageIter.Err)
+				log.Error("Failed to traverse storage trie", "root", acc.Root, "err", storageIter.Err)
 				return storageIter.Err
 			}
 		}
@@ -294,7 +294,7 @@ func traverseState(ctx *cli.Context) error {
 		}
 	}
 	if accIter.Err != nil {
-		log.Error("Failed to traverse state trie", "root", root, "error", accIter.Err)
+		log.Error("Failed to traverse state trie", "root", root, "err", accIter.Err)
 		return accIter.Err
 	}
 	log.Info("State is complete", "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start)))
@@ -326,7 +326,7 @@ func traverseRawState(ctx *cli.Context) error {
 	if ctx.NArg() == 1 {
 		root, err = parseRoot(ctx.Args()[0])
 		if err != nil {
-			log.Error("Failed to resolve state root", "error", err)
+			log.Error("Failed to resolve state root", "err", err)
 			return err
 		}
 		log.Info("Start traversing the state", "root", root)
@@ -337,7 +337,7 @@ func traverseRawState(ctx *cli.Context) error {
 	triedb := trie.NewDatabase(chaindb)
 	t, err := trie.NewSecure(root, triedb)
 	if err != nil {
-		log.Error("Failed to open trie", "root", root, "error", err)
+		log.Error("Failed to open trie", "root", root, "err", err)
 		return err
 	}
 	var (
@@ -368,13 +368,13 @@ func traverseRawState(ctx *cli.Context) error {
 			accounts += 1
 			var acc state.Account
 			if err := rlp.DecodeBytes(accIter.LeafBlob(), &acc); err != nil {
-				log.Error("Invalid account encountered during traversal", "error", err)
+				log.Error("Invalid account encountered during traversal", "err", err)
 				return errors.New("invalid account")
 			}
 			if acc.Root != emptyRoot {
 				storageTrie, err := trie.NewSecure(acc.Root, triedb)
 				if err != nil {
-					log.Error("Failed to open storage trie", "root", acc.Root, "error", err)
+					log.Error("Failed to open storage trie", "root", acc.Root, "err", err)
 					return errors.New("missing storage trie")
 				}
 				storageIter := storageTrie.NodeIterator(nil)
@@ -397,7 +397,7 @@ func traverseRawState(ctx *cli.Context) error {
 					}
 				}
 				if storageIter.Error() != nil {
-					log.Error("Failed to traverse storage trie", "root", acc.Root, "error", storageIter.Error())
+					log.Error("Failed to traverse storage trie", "root", acc.Root, "err", storageIter.Error())
 					return storageIter.Error()
 				}
 			}
@@ -416,7 +416,7 @@ func traverseRawState(ctx *cli.Context) error {
 		}
 	}
 	if accIter.Error() != nil {
-		log.Error("Failed to traverse state trie", "root", root, "error", accIter.Error())
+		log.Error("Failed to traverse state trie", "root", root, "err", accIter.Error())
 		return accIter.Error()
 	}
 	log.Info("State is complete", "nodes", nodes, "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start)))

+ 1 - 1
core/state/snapshot/conversion.go

@@ -322,7 +322,7 @@ func generateTrieRoot(db ethdb.KeyValueWriter, it Iterator, account common.Hash,
 						return
 					}
 					if !bytes.Equal(account.Root, subroot.Bytes()) {
-						results <- fmt.Errorf("invalid subroot(%x), want %x, got %x", it.Hash(), account.Root, subroot)
+						results <- fmt.Errorf("invalid subroot(path %x), want %x, have %x", hash, account.Root, subroot)
 						return
 					}
 					results <- nil

+ 548 - 131
core/state/snapshot/generate.go

@@ -19,17 +19,20 @@ package snapshot
 import (
 	"bytes"
 	"encoding/binary"
+	"errors"
 	"fmt"
 	"math/big"
 	"time"
 
 	"github.com/VictoriaMetrics/fastcache"
 	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/common/hexutil"
 	"github.com/ethereum/go-ethereum/common/math"
 	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/crypto"
 	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/log"
+	"github.com/ethereum/go-ethereum/metrics"
 	"github.com/ethereum/go-ethereum/rlp"
 	"github.com/ethereum/go-ethereum/trie"
 )
@@ -40,17 +43,63 @@ var (
 
 	// emptyCode is the known hash of the empty EVM bytecode.
 	emptyCode = crypto.Keccak256Hash(nil)
+
+	// accountCheckRange is the upper limit of the number of accounts involved in
+	// each range check. This is a value estimated based on experience. If this
+	// value is too large, the failure rate of range prove will increase. Otherwise
+	// the the value is too small, the efficiency of the state recovery will decrease.
+	accountCheckRange = 128
+
+	// storageCheckRange is the upper limit of the number of storage slots involved
+	// in each range check. This is a value estimated based on experience. If this
+	// value is too large, the failure rate of range prove will increase. Otherwise
+	// the the value is too small, the efficiency of the state recovery will decrease.
+	storageCheckRange = 1024
+
+	// errMissingTrie is returned if the target trie is missing while the generation
+	// is running. In this case the generation is aborted and wait the new signal.
+	errMissingTrie = errors.New("missing trie")
+)
+
+// Metrics in generation
+var (
+	snapGeneratedAccountMeter     = metrics.NewRegisteredMeter("state/snapshot/generation/account/generated", nil)
+	snapRecoveredAccountMeter     = metrics.NewRegisteredMeter("state/snapshot/generation/account/recovered", nil)
+	snapWipedAccountMeter         = metrics.NewRegisteredMeter("state/snapshot/generation/account/wiped", nil)
+	snapMissallAccountMeter       = metrics.NewRegisteredMeter("state/snapshot/generation/account/missall", nil)
+	snapGeneratedStorageMeter     = metrics.NewRegisteredMeter("state/snapshot/generation/storage/generated", nil)
+	snapRecoveredStorageMeter     = metrics.NewRegisteredMeter("state/snapshot/generation/storage/recovered", nil)
+	snapWipedStorageMeter         = metrics.NewRegisteredMeter("state/snapshot/generation/storage/wiped", nil)
+	snapMissallStorageMeter       = metrics.NewRegisteredMeter("state/snapshot/generation/storage/missall", nil)
+	snapSuccessfulRangeProofMeter = metrics.NewRegisteredMeter("state/snapshot/generation/proof/success", nil)
+	snapFailedRangeProofMeter     = metrics.NewRegisteredMeter("state/snapshot/generation/proof/failure", nil)
+
+	// snapAccountProveCounter measures time spent on the account proving
+	snapAccountProveCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/account/prove", nil)
+	// snapAccountTrieReadCounter measures time spent on the account trie iteration
+	snapAccountTrieReadCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/account/trieread", nil)
+	// snapAccountSnapReadCounter measues time spent on the snapshot account iteration
+	snapAccountSnapReadCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/account/snapread", nil)
+	// snapAccountWriteCounter measures time spent on writing/updating/deleting accounts
+	snapAccountWriteCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/account/write", nil)
+	// snapStorageProveCounter measures time spent on storage proving
+	snapStorageProveCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/storage/prove", nil)
+	// snapStorageTrieReadCounter measures time spent on the storage trie iteration
+	snapStorageTrieReadCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/storage/trieread", nil)
+	// snapStorageSnapReadCounter measures time spent on the snapshot storage iteration
+	snapStorageSnapReadCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/storage/snapread", nil)
+	// snapStorageWriteCounter measures time spent on writing/updating/deleting storages
+	snapStorageWriteCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/storage/write", nil)
 )
 
 // generatorStats is a collection of statistics gathered by the snapshot generator
 // for logging purposes.
 type generatorStats struct {
-	wiping   chan struct{}      // Notification channel if wiping is in progress
 	origin   uint64             // Origin prefix where generation started
 	start    time.Time          // Timestamp when generation started
-	accounts uint64             // Number of accounts indexed
-	slots    uint64             // Number of storage slots indexed
-	storage  common.StorageSize // Account and storage slot size
+	accounts uint64             // Number of accounts indexed(generated or recovered)
+	slots    uint64             // Number of storage slots indexed(generated or recovered)
+	storage  common.StorageSize // Total account and storage slot size(generation or recovery)
 }
 
 // Log creates an contextual log with the given message and the context pulled
@@ -91,25 +140,30 @@ func (gs *generatorStats) Log(msg string, root common.Hash, marker []byte) {
 	log.Info(msg, ctx...)
 }
 
+// ClearSnapshotMarker sets the snapshot marker to zero, meaning that snapshots
+// are not usable.
+func ClearSnapshotMarker(diskdb ethdb.KeyValueStore) {
+	batch := diskdb.NewBatch()
+	journalProgress(batch, []byte{}, nil)
+	if err := batch.Write(); err != nil {
+		log.Crit("Failed to write initialized state marker", "err", err)
+	}
+}
+
 // generateSnapshot regenerates a brand new snapshot based on an existing state
 // database and head block asynchronously. The snapshot is returned immediately
 // and generation is continued in the background until done.
-func generateSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, wiper chan struct{}) *diskLayer {
-	// Wipe any previously existing snapshot from the database if no wiper is
-	// currently in progress.
-	if wiper == nil {
-		wiper = wipeSnapshot(diskdb, true)
-	}
+func generateSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash) *diskLayer {
 	// Create a new disk layer with an initialized state marker at zero
 	var (
-		stats     = &generatorStats{wiping: wiper, start: time.Now()}
+		stats     = &generatorStats{start: time.Now()}
 		batch     = diskdb.NewBatch()
 		genMarker = []byte{} // Initialized but empty!
 	)
 	rawdb.WriteSnapshotRoot(batch, root)
 	journalProgress(batch, genMarker, stats)
 	if err := batch.Write(); err != nil {
-		log.Crit("Failed to write initialized state marker", "error", err)
+		log.Crit("Failed to write initialized state marker", "err", err)
 	}
 	base := &diskLayer{
 		diskdb:     diskdb,
@@ -135,7 +189,6 @@ func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorSta
 		Marker: marker,
 	}
 	if stats != nil {
-		entry.Wiping = (stats.wiping != nil)
 		entry.Accounts = stats.accounts
 		entry.Slots = stats.slots
 		entry.Storage = uint64(stats.storage)
@@ -159,169 +212,521 @@ func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorSta
 	rawdb.WriteSnapshotGenerator(db, blob)
 }
 
-// generate is a background thread that iterates over the state and storage tries,
-// constructing the state snapshot. All the arguments are purely for statistics
-// gathering and logging, since the method surfs the blocks as they arrive, often
-// being restarted.
-func (dl *diskLayer) generate(stats *generatorStats) {
-	// If a database wipe is in operation, wait until it's done
-	if stats.wiping != nil {
-		stats.Log("Wiper running, state snapshotting paused", common.Hash{}, dl.genMarker)
-		select {
-		// If wiper is done, resume normal mode of operation
-		case <-stats.wiping:
-			stats.wiping = nil
-			stats.start = time.Now()
+// proofResult contains the output of range proving which can be used
+// for further processing regardless if it is successful or not.
+type proofResult struct {
+	keys     [][]byte   // The key set of all elements being iterated, even proving is failed
+	vals     [][]byte   // The val set of all elements being iterated, even proving is failed
+	diskMore bool       // Set when the database has extra snapshot states since last iteration
+	trieMore bool       // Set when the trie has extra snapshot states(only meaningful for successful proving)
+	proofErr error      // Indicator whether the given state range is valid or not
+	tr       *trie.Trie // The trie, in case the trie was resolved by the prover (may be nil)
+}
 
-		// If generator was aborted during wipe, return
-		case abort := <-dl.genAbort:
-			abort <- stats
-			return
+// valid returns the indicator that range proof is successful or not.
+func (result *proofResult) valid() bool {
+	return result.proofErr == nil
+}
+
+// last returns the last verified element key regardless of whether the range proof is
+// successful or not. Nil is returned if nothing involved in the proving.
+func (result *proofResult) last() []byte {
+	var last []byte
+	if len(result.keys) > 0 {
+		last = result.keys[len(result.keys)-1]
+	}
+	return last
+}
+
+// forEach iterates all the visited elements and applies the given callback on them.
+// The iteration is aborted if the callback returns non-nil error.
+func (result *proofResult) forEach(callback func(key []byte, val []byte) error) error {
+	for i := 0; i < len(result.keys); i++ {
+		key, val := result.keys[i], result.vals[i]
+		if err := callback(key, val); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// proveRange proves the snapshot segment with particular prefix is "valid".
+// The iteration start point will be assigned if the iterator is restored from
+// the last interruption. Max will be assigned in order to limit the maximum
+// amount of data involved in each iteration.
+//
+// The proof result will be returned if the range proving is finished, otherwise
+// the error will be returned to abort the entire procedure.
+func (dl *diskLayer) proveRange(stats *generatorStats, root common.Hash, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) (*proofResult, error) {
+	var (
+		keys     [][]byte
+		vals     [][]byte
+		proof    = rawdb.NewMemoryDatabase()
+		diskMore = false
+	)
+	iter := dl.diskdb.NewIterator(prefix, origin)
+	defer iter.Release()
+
+	var start = time.Now()
+	for iter.Next() {
+		key := iter.Key()
+		if len(key) != len(prefix)+common.HashLength {
+			continue
+		}
+		if len(keys) == max {
+			// Break if we've reached the max size, and signal that we're not
+			// done yet.
+			diskMore = true
+			break
+		}
+		keys = append(keys, common.CopyBytes(key[len(prefix):]))
+
+		if valueConvertFn == nil {
+			vals = append(vals, common.CopyBytes(iter.Value()))
+		} else {
+			val, err := valueConvertFn(iter.Value())
+			if err != nil {
+				// Special case, the state data is corrupted (invalid slim-format account),
+				// don't abort the entire procedure directly. Instead, let the fallback
+				// generation to heal the invalid data.
+				//
+				// Here append the original value to ensure that the number of key and
+				// value are the same.
+				vals = append(vals, common.CopyBytes(iter.Value()))
+				log.Error("Failed to convert account state data", "err", err)
+			} else {
+				vals = append(vals, val)
+			}
+		}
+	}
+	// Update metrics for database iteration and merkle proving
+	if kind == "storage" {
+		snapStorageSnapReadCounter.Inc(time.Since(start).Nanoseconds())
+	} else {
+		snapAccountSnapReadCounter.Inc(time.Since(start).Nanoseconds())
+	}
+	defer func(start time.Time) {
+		if kind == "storage" {
+			snapStorageProveCounter.Inc(time.Since(start).Nanoseconds())
+		} else {
+			snapAccountProveCounter.Inc(time.Since(start).Nanoseconds())
+		}
+	}(time.Now())
+
+	// The snap state is exhausted, pass the entire key/val set for verification
+	if origin == nil && !diskMore {
+		stackTr := trie.NewStackTrie(nil)
+		for i, key := range keys {
+			stackTr.TryUpdate(key, common.CopyBytes(vals[i]))
+		}
+		if gotRoot := stackTr.Hash(); gotRoot != root {
+			return &proofResult{
+				keys:     keys,
+				vals:     vals,
+				proofErr: fmt.Errorf("wrong root: have %#x want %#x", gotRoot, root),
+			}, nil
 		}
+		return &proofResult{keys: keys, vals: vals}, nil
 	}
-	// Create an account and state iterator pointing to the current generator marker
-	accTrie, err := trie.NewSecure(dl.root, dl.triedb)
+	// Snap state is chunked, generate edge proofs for verification.
+	tr, err := trie.New(root, dl.triedb)
 	if err != nil {
-		// The account trie is missing (GC), surf the chain until one becomes available
 		stats.Log("Trie missing, state snapshotting paused", dl.root, dl.genMarker)
+		return nil, errMissingTrie
+	}
+	// Firstly find out the key of last iterated element.
+	var last []byte
+	if len(keys) > 0 {
+		last = keys[len(keys)-1]
+	}
+	// Generate the Merkle proofs for the first and last element
+	if origin == nil {
+		origin = common.Hash{}.Bytes()
+	}
+	if err := tr.Prove(origin, 0, proof); err != nil {
+		log.Debug("Failed to prove range", "kind", kind, "origin", origin, "err", err)
+		return &proofResult{
+			keys:     keys,
+			vals:     vals,
+			diskMore: diskMore,
+			proofErr: err,
+			tr:       tr,
+		}, nil
+	}
+	if last != nil {
+		if err := tr.Prove(last, 0, proof); err != nil {
+			log.Debug("Failed to prove range", "kind", kind, "last", last, "err", err)
+			return &proofResult{
+				keys:     keys,
+				vals:     vals,
+				diskMore: diskMore,
+				proofErr: err,
+				tr:       tr,
+			}, nil
+		}
+	}
+	// Verify the snapshot segment with range prover, ensure that all flat states
+	// in this range correspond to merkle trie.
+	_, _, _, cont, err := trie.VerifyRangeProof(root, origin, last, keys, vals, proof)
+	return &proofResult{
+			keys:     keys,
+			vals:     vals,
+			diskMore: diskMore,
+			trieMore: cont,
+			proofErr: err,
+			tr:       tr},
+		nil
+}
 
-		abort := <-dl.genAbort
-		abort <- stats
-		return
+// onStateCallback is a function that is called by generateRange, when processing a range of
+// accounts or storage slots. For each element, the callback is invoked.
+// If 'delete' is true, then this element (and potential slots) needs to be deleted from the snapshot.
+// If 'write' is true, then this element needs to be updated with the 'val'.
+// If 'write' is false, then this element is already correct, and needs no update. However,
+// for accounts, the storage trie of the account needs to be checked.
+// The 'val' is the canonical encoding of the value (not the slim format for accounts)
+type onStateCallback func(key []byte, val []byte, write bool, delete bool) error
+
+// generateRange generates the state segment with particular prefix. Generation can
+// either verify the correctness of existing state through rangeproof and skip
+// generation, or iterate trie to regenerate state on demand.
+func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState onStateCallback, valueConvertFn func([]byte) ([]byte, error)) (bool, []byte, error) {
+	// Use range prover to check the validity of the flat state in the range
+	result, err := dl.proveRange(stats, root, prefix, kind, origin, max, valueConvertFn)
+	if err != nil {
+		return false, nil, err
 	}
-	stats.Log("Resuming state snapshot generation", dl.root, dl.genMarker)
+	last := result.last()
+
+	// Construct contextual logger
+	logCtx := []interface{}{"kind", kind, "prefix", hexutil.Encode(prefix)}
+	if len(origin) > 0 {
+		logCtx = append(logCtx, "origin", hexutil.Encode(origin))
+	}
+	logger := log.New(logCtx...)
+
+	// The range prover says the range is correct, skip trie iteration
+	if result.valid() {
+		snapSuccessfulRangeProofMeter.Mark(1)
+		logger.Trace("Proved state range", "last", hexutil.Encode(last))
 
-	var accMarker []byte
+		// The verification is passed, process each state with the given
+		// callback function. If this state represents a contract, the
+		// corresponding storage check will be performed in the callback
+		if err := result.forEach(func(key []byte, val []byte) error { return onState(key, val, false, false) }); err != nil {
+			return false, nil, err
+		}
+		// Only abort the iteration when both database and trie are exhausted
+		return !result.diskMore && !result.trieMore, last, nil
+	}
+	logger.Trace("Detected outdated state range", "last", hexutil.Encode(last), "err", result.proofErr)
+	snapFailedRangeProofMeter.Mark(1)
+
+	// Special case, the entire trie is missing. In the original trie scheme,
+	// all the duplicated subtries will be filter out(only one copy of data
+	// will be stored). While in the snapshot model, all the storage tries
+	// belong to different contracts will be kept even they are duplicated.
+	// Track it to a certain extent remove the noise data used for statistics.
+	if origin == nil && last == nil {
+		meter := snapMissallAccountMeter
+		if kind == "storage" {
+			meter = snapMissallStorageMeter
+		}
+		meter.Mark(1)
+	}
+	tr := result.tr
+	if tr == nil {
+		tr, err = trie.New(root, dl.triedb)
+		if err != nil {
+			stats.Log("Trie missing, state snapshotting paused", dl.root, dl.genMarker)
+			return false, nil, errMissingTrie
+		}
+	}
+	var (
+		trieMore       bool
+		iter           = trie.NewIterator(tr.NodeIterator(origin))
+		kvkeys, kvvals = result.keys, result.vals
+
+		// counters
+		count     = 0 // number of states delivered by iterator
+		created   = 0 // states created from the trie
+		updated   = 0 // states updated from the trie
+		deleted   = 0 // states not in trie, but were in snapshot
+		untouched = 0 // states already correct
+
+		// timers
+		start    = time.Now()
+		internal time.Duration
+	)
+	for iter.Next() {
+		if last != nil && bytes.Compare(iter.Key, last) > 0 {
+			trieMore = true
+			break
+		}
+		count++
+		write := true
+		created++
+		for len(kvkeys) > 0 {
+			if cmp := bytes.Compare(kvkeys[0], iter.Key); cmp < 0 {
+				// delete the key
+				istart := time.Now()
+				if err := onState(kvkeys[0], nil, false, true); err != nil {
+					return false, nil, err
+				}
+				kvkeys = kvkeys[1:]
+				kvvals = kvvals[1:]
+				deleted++
+				internal += time.Since(istart)
+				continue
+			} else if cmp == 0 {
+				// the snapshot key can be overwritten
+				created--
+				if write = !bytes.Equal(kvvals[0], iter.Value); write {
+					updated++
+				} else {
+					untouched++
+				}
+				kvkeys = kvkeys[1:]
+				kvvals = kvvals[1:]
+			}
+			break
+		}
+		istart := time.Now()
+		if err := onState(iter.Key, iter.Value, write, false); err != nil {
+			return false, nil, err
+		}
+		internal += time.Since(istart)
+	}
+	if iter.Err != nil {
+		return false, nil, iter.Err
+	}
+	// Delete all stale snapshot states remaining
+	istart := time.Now()
+	for _, key := range kvkeys {
+		if err := onState(key, nil, false, true); err != nil {
+			return false, nil, err
+		}
+		deleted += 1
+	}
+	internal += time.Since(istart)
+
+	// Update metrics for counting trie iteration
+	if kind == "storage" {
+		snapStorageTrieReadCounter.Inc((time.Since(start) - internal).Nanoseconds())
+	} else {
+		snapAccountTrieReadCounter.Inc((time.Since(start) - internal).Nanoseconds())
+	}
+	logger.Debug("Regenerated state range", "root", root, "last", hexutil.Encode(last),
+		"count", count, "created", created, "updated", updated, "untouched", untouched, "deleted", deleted)
+
+	// If there are either more trie items, or there are more snap items
+	// (in the next segment), then we need to keep working
+	return !trieMore && !result.diskMore, last, nil
+}
+
+// generate is a background thread that iterates over the state and storage tries,
+// constructing the state snapshot. All the arguments are purely for statistics
+// gathering and logging, since the method surfs the blocks as they arrive, often
+// being restarted.
+func (dl *diskLayer) generate(stats *generatorStats) {
+	var (
+		accMarker    []byte
+		accountRange = accountCheckRange
+	)
 	if len(dl.genMarker) > 0 { // []byte{} is the start, use nil for that
-		accMarker = dl.genMarker[:common.HashLength]
+		// Always reset the initial account range as 1
+		// whenever recover from the interruption.
+		accMarker, accountRange = dl.genMarker[:common.HashLength], 1
 	}
-	accIt := trie.NewIterator(accTrie.NodeIterator(accMarker))
-	batch := dl.diskdb.NewBatch()
+	var (
+		batch     = dl.diskdb.NewBatch()
+		logged    = time.Now()
+		accOrigin = common.CopyBytes(accMarker)
+		abort     chan *generatorStats
+	)
+	stats.Log("Resuming state snapshot generation", dl.root, dl.genMarker)
 
-	// Iterate from the previous marker and continue generating the state snapshot
-	logged := time.Now()
-	for accIt.Next() {
-		// Retrieve the current account and flatten it into the internal format
-		accountHash := common.BytesToHash(accIt.Key)
+	checkAndFlush := func(currentLocation []byte) error {
+		select {
+		case abort = <-dl.genAbort:
+		default:
+		}
+		if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil {
+			// Flush out the batch anyway no matter it's empty or not.
+			// It's possible that all the states are recovered and the
+			// generation indeed makes progress.
+			journalProgress(batch, currentLocation, stats)
+
+			if err := batch.Write(); err != nil {
+				return err
+			}
+			batch.Reset()
+
+			dl.lock.Lock()
+			dl.genMarker = currentLocation
+			dl.lock.Unlock()
+
+			if abort != nil {
+				stats.Log("Aborting state snapshot generation", dl.root, currentLocation)
+				return errors.New("aborted")
+			}
+		}
+		if time.Since(logged) > 8*time.Second {
+			stats.Log("Generating state snapshot", dl.root, currentLocation)
+			logged = time.Now()
+		}
+		return nil
+	}
 
+	onAccount := func(key []byte, val []byte, write bool, delete bool) error {
+		var (
+			start       = time.Now()
+			accountHash = common.BytesToHash(key)
+		)
+		if delete {
+			rawdb.DeleteAccountSnapshot(batch, accountHash)
+			snapWipedAccountMeter.Mark(1)
+
+			// Ensure that any previous snapshot storage values are cleared
+			prefix := append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...)
+			keyLen := len(rawdb.SnapshotStoragePrefix) + 2*common.HashLength
+			if err := wipeKeyRange(dl.diskdb, "storage", prefix, nil, nil, keyLen, snapWipedStorageMeter, false); err != nil {
+				return err
+			}
+			snapAccountWriteCounter.Inc(time.Since(start).Nanoseconds())
+			return nil
+		}
+		// Retrieve the current account and flatten it into the internal format
 		var acc struct {
 			Nonce    uint64
 			Balance  *big.Int
 			Root     common.Hash
 			CodeHash []byte
 		}
-		if err := rlp.DecodeBytes(accIt.Value, &acc); err != nil {
+		if err := rlp.DecodeBytes(val, &acc); err != nil {
 			log.Crit("Invalid account encountered during snapshot creation", "err", err)
 		}
-		data := SlimAccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash)
-
 		// If the account is not yet in-progress, write it out
 		if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) {
-			rawdb.WriteAccountSnapshot(batch, accountHash, data)
-			stats.storage += common.StorageSize(1 + common.HashLength + len(data))
+			dataLen := len(val) // Approximate size, saves us a round of RLP-encoding
+			if !write {
+				if bytes.Equal(acc.CodeHash, emptyCode[:]) {
+					dataLen -= 32
+				}
+				if acc.Root == emptyRoot {
+					dataLen -= 32
+				}
+				snapRecoveredAccountMeter.Mark(1)
+			} else {
+				data := SlimAccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash)
+				dataLen = len(data)
+				rawdb.WriteAccountSnapshot(batch, accountHash, data)
+				snapGeneratedAccountMeter.Mark(1)
+			}
+			stats.storage += common.StorageSize(1 + common.HashLength + dataLen)
 			stats.accounts++
 		}
 		// If we've exceeded our batch allowance or termination was requested, flush to disk
-		var abort chan *generatorStats
-		select {
-		case abort = <-dl.genAbort:
-		default:
-		}
-		if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil {
-			// Only write and set the marker if we actually did something useful
-			if batch.ValueSize() > 0 {
-				// Ensure the generator entry is in sync with the data
-				marker := accountHash[:]
-				journalProgress(batch, marker, stats)
-
-				batch.Write()
-				batch.Reset()
-
-				dl.lock.Lock()
-				dl.genMarker = marker
-				dl.lock.Unlock()
-			}
-			if abort != nil {
-				stats.Log("Aborting state snapshot generation", dl.root, accountHash[:])
-				abort <- stats
-				return
-			}
+		if err := checkAndFlush(accountHash[:]); err != nil {
+			return err
 		}
-		// If the account is in-progress, continue where we left off (otherwise iterate all)
-		if acc.Root != emptyRoot {
-			storeTrie, err := trie.NewSecure(acc.Root, dl.triedb)
-			if err != nil {
-				log.Error("Generator failed to access storage trie", "root", dl.root, "account", accountHash, "stroot", acc.Root, "err", err)
-				abort := <-dl.genAbort
-				abort <- stats
-				return
+		// If the iterated account is the contract, create a further loop to
+		// verify or regenerate the contract storage.
+		if acc.Root == emptyRoot {
+			// If the root is empty, we still need to ensure that any previous snapshot
+			// storage values are cleared
+			// TODO: investigate if this can be avoided, this will be very costly since it
+			// affects every single EOA account
+			//  - Perhaps we can avoid if where codeHash is emptyCode
+			prefix := append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...)
+			keyLen := len(rawdb.SnapshotStoragePrefix) + 2*common.HashLength
+			if err := wipeKeyRange(dl.diskdb, "storage", prefix, nil, nil, keyLen, snapWipedStorageMeter, false); err != nil {
+				return err
 			}
+			snapAccountWriteCounter.Inc(time.Since(start).Nanoseconds())
+		} else {
+			snapAccountWriteCounter.Inc(time.Since(start).Nanoseconds())
+
 			var storeMarker []byte
 			if accMarker != nil && bytes.Equal(accountHash[:], accMarker) && len(dl.genMarker) > common.HashLength {
 				storeMarker = dl.genMarker[common.HashLength:]
 			}
-			storeIt := trie.NewIterator(storeTrie.NodeIterator(storeMarker))
-			for storeIt.Next() {
-				rawdb.WriteStorageSnapshot(batch, accountHash, common.BytesToHash(storeIt.Key), storeIt.Value)
-				stats.storage += common.StorageSize(1 + 2*common.HashLength + len(storeIt.Value))
+			onStorage := func(key []byte, val []byte, write bool, delete bool) error {
+				defer func(start time.Time) {
+					snapStorageWriteCounter.Inc(time.Since(start).Nanoseconds())
+				}(time.Now())
+
+				if delete {
+					rawdb.DeleteStorageSnapshot(batch, accountHash, common.BytesToHash(key))
+					snapWipedStorageMeter.Mark(1)
+					return nil
+				}
+				if write {
+					rawdb.WriteStorageSnapshot(batch, accountHash, common.BytesToHash(key), val)
+					snapGeneratedStorageMeter.Mark(1)
+				} else {
+					snapRecoveredStorageMeter.Mark(1)
+				}
+				stats.storage += common.StorageSize(1 + 2*common.HashLength + len(val))
 				stats.slots++
 
 				// If we've exceeded our batch allowance or termination was requested, flush to disk
-				var abort chan *generatorStats
-				select {
-				case abort = <-dl.genAbort:
-				default:
-				}
-				if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil {
-					// Only write and set the marker if we actually did something useful
-					if batch.ValueSize() > 0 {
-						// Ensure the generator entry is in sync with the data
-						marker := append(accountHash[:], storeIt.Key...)
-						journalProgress(batch, marker, stats)
-
-						batch.Write()
-						batch.Reset()
-
-						dl.lock.Lock()
-						dl.genMarker = marker
-						dl.lock.Unlock()
-					}
-					if abort != nil {
-						stats.Log("Aborting state snapshot generation", dl.root, append(accountHash[:], storeIt.Key...))
-						abort <- stats
-						return
-					}
-					if time.Since(logged) > 8*time.Second {
-						stats.Log("Generating state snapshot", dl.root, append(accountHash[:], storeIt.Key...))
-						logged = time.Now()
-					}
+				if err := checkAndFlush(append(accountHash[:], key...)); err != nil {
+					return err
 				}
+				return nil
 			}
-			if err := storeIt.Err; err != nil {
-				log.Error("Generator failed to iterate storage trie", "accroot", dl.root, "acchash", common.BytesToHash(accIt.Key), "stroot", acc.Root, "err", err)
-				abort := <-dl.genAbort
-				abort <- stats
-				return
+			var storeOrigin = common.CopyBytes(storeMarker)
+			for {
+				exhausted, last, err := dl.generateRange(acc.Root, append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...), "storage", storeOrigin, storageCheckRange, stats, onStorage, nil)
+				if err != nil {
+					return err
+				}
+				if exhausted {
+					break
+				}
+				if storeOrigin = increaseKey(last); storeOrigin == nil {
+					break // special case, the last is 0xffffffff...fff
+				}
 			}
 		}
-		if time.Since(logged) > 8*time.Second {
-			stats.Log("Generating state snapshot", dl.root, accIt.Key)
-			logged = time.Now()
-		}
 		// Some account processed, unmark the marker
 		accMarker = nil
+		return nil
 	}
-	if err := accIt.Err; err != nil {
-		log.Error("Generator failed to iterate account trie", "root", dl.root, "err", err)
-		abort := <-dl.genAbort
-		abort <- stats
-		return
+
+	// Global loop for regerating the entire state trie + all layered storage tries.
+	for {
+		exhausted, last, err := dl.generateRange(dl.root, rawdb.SnapshotAccountPrefix, "account", accOrigin, accountRange, stats, onAccount, FullAccountRLP)
+		// The procedure it aborted, either by external signal or internal error
+		if err != nil {
+			if abort == nil { // aborted by internal error, wait the signal
+				abort = <-dl.genAbort
+			}
+			abort <- stats
+			return
+		}
+		// Abort the procedure if the entire snapshot is generated
+		if exhausted {
+			break
+		}
+		if accOrigin = increaseKey(last); accOrigin == nil {
+			break // special case, the last is 0xffffffff...fff
+		}
+		accountRange = accountCheckRange
 	}
 	// Snapshot fully generated, set the marker to nil.
 	// Note even there is nothing to commit, persist the
 	// generator anyway to mark the snapshot is complete.
 	journalProgress(batch, nil, stats)
-	batch.Write()
+	if err := batch.Write(); err != nil {
+		log.Error("Failed to flush batch", "err", err)
+
+		abort = <-dl.genAbort
+		abort <- stats
+		return
+	}
+	batch.Reset()
 
 	log.Info("Generated state snapshot", "accounts", stats.accounts, "slots", stats.slots,
 		"storage", stats.storage, "elapsed", common.PrettyDuration(time.Since(stats.start)))
@@ -332,6 +737,18 @@ func (dl *diskLayer) generate(stats *generatorStats) {
 	dl.lock.Unlock()
 
 	// Someone will be looking for us, wait it out
-	abort := <-dl.genAbort
+	abort = <-dl.genAbort
 	abort <- nil
 }
+
+// increaseKey increase the input key by one bit. Return nil if the entire
+// addition operation overflows,
+func increaseKey(key []byte) []byte {
+	for i := len(key) - 1; i >= 0; i-- {
+		key[i]++
+		if key[i] != 0x0 {
+			return key
+		}
+	}
+	return nil
+}

+ 646 - 3
core/state/snapshot/generate_test.go

@@ -17,16 +17,361 @@
 package snapshot
 
 import (
+	"fmt"
 	"math/big"
+	"os"
 	"testing"
 	"time"
 
 	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/ethdb/memorydb"
+	"github.com/ethereum/go-ethereum/log"
 	"github.com/ethereum/go-ethereum/rlp"
 	"github.com/ethereum/go-ethereum/trie"
+	"golang.org/x/crypto/sha3"
 )
 
+// Tests that snapshot generation from an empty database.
+func TestGeneration(t *testing.T) {
+	// We can't use statedb to make a test trie (circular dependency), so make
+	// a fake one manually. We're going with a small account trie of 3 accounts,
+	// two of which also has the same 3-slot storage trie attached.
+	var (
+		diskdb = memorydb.New()
+		triedb = trie.NewDatabase(diskdb)
+	)
+	stTrie, _ := trie.NewSecure(common.Hash{}, triedb)
+	stTrie.Update([]byte("key-1"), []byte("val-1")) // 0x1314700b81afc49f94db3623ef1df38f3ed18b73a1b7ea2f6c095118cf6118a0
+	stTrie.Update([]byte("key-2"), []byte("val-2")) // 0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371
+	stTrie.Update([]byte("key-3"), []byte("val-3")) // 0x51c71a47af0695957647fb68766d0becee77e953df17c29b3c2f25436f055c78
+	stTrie.Commit(nil)                              // Root: 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67
+
+	accTrie, _ := trie.NewSecure(common.Hash{}, triedb)
+	acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()}
+	val, _ := rlp.EncodeToBytes(acc)
+	accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e
+
+	acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}
+	val, _ = rlp.EncodeToBytes(acc)
+	accTrie.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7
+
+	acc = &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()}
+	val, _ = rlp.EncodeToBytes(acc)
+	accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2
+	root, _ := accTrie.Commit(nil)       // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd
+	triedb.Commit(root, false, nil)
+
+	if have, want := root, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"); have != want {
+		t.Fatalf("have %#x want %#x", have, want)
+	}
+	snap := generateSnapshot(diskdb, triedb, 16, root)
+	select {
+	case <-snap.genPending:
+		// Snapshot generation succeeded
+
+	case <-time.After(250 * time.Millisecond):
+		t.Errorf("Snapshot generation failed")
+	}
+	checkSnapRoot(t, snap, root)
+	// Signal abortion to the generator and wait for it to tear down
+	stop := make(chan *generatorStats)
+	snap.genAbort <- stop
+	<-stop
+}
+
+func hashData(input []byte) common.Hash {
+	var hasher = sha3.NewLegacyKeccak256()
+	var hash common.Hash
+	hasher.Reset()
+	hasher.Write(input)
+	hasher.Sum(hash[:0])
+	return hash
+}
+
+// Tests that snapshot generation with existent flat state.
+func TestGenerateExistentState(t *testing.T) {
+	// We can't use statedb to make a test trie (circular dependency), so make
+	// a fake one manually. We're going with a small account trie of 3 accounts,
+	// two of which also has the same 3-slot storage trie attached.
+	var (
+		diskdb = memorydb.New()
+		triedb = trie.NewDatabase(diskdb)
+	)
+	stTrie, _ := trie.NewSecure(common.Hash{}, triedb)
+	stTrie.Update([]byte("key-1"), []byte("val-1")) // 0x1314700b81afc49f94db3623ef1df38f3ed18b73a1b7ea2f6c095118cf6118a0
+	stTrie.Update([]byte("key-2"), []byte("val-2")) // 0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371
+	stTrie.Update([]byte("key-3"), []byte("val-3")) // 0x51c71a47af0695957647fb68766d0becee77e953df17c29b3c2f25436f055c78
+	stTrie.Commit(nil)                              // Root: 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67
+
+	accTrie, _ := trie.NewSecure(common.Hash{}, triedb)
+	acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()}
+	val, _ := rlp.EncodeToBytes(acc)
+	accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e
+	rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-1")), val)
+	rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-1")), []byte("val-1"))
+	rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-2")), []byte("val-2"))
+	rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-3")), []byte("val-3"))
+
+	acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}
+	val, _ = rlp.EncodeToBytes(acc)
+	accTrie.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7
+	diskdb.Put(hashData([]byte("acc-2")).Bytes(), val)
+	rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-2")), val)
+
+	acc = &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()}
+	val, _ = rlp.EncodeToBytes(acc)
+	accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2
+	rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-3")), val)
+	rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-1")), []byte("val-1"))
+	rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-2")), []byte("val-2"))
+	rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-3")), []byte("val-3"))
+
+	root, _ := accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd
+	triedb.Commit(root, false, nil)
+
+	snap := generateSnapshot(diskdb, triedb, 16, root)
+	select {
+	case <-snap.genPending:
+		// Snapshot generation succeeded
+
+	case <-time.After(250 * time.Millisecond):
+		t.Errorf("Snapshot generation failed")
+	}
+	checkSnapRoot(t, snap, root)
+	// Signal abortion to the generator and wait for it to tear down
+	stop := make(chan *generatorStats)
+	snap.genAbort <- stop
+	<-stop
+}
+
+func checkSnapRoot(t *testing.T, snap *diskLayer, trieRoot common.Hash) {
+	t.Helper()
+	accIt := snap.AccountIterator(common.Hash{})
+	defer accIt.Release()
+	snapRoot, err := generateTrieRoot(nil, accIt, common.Hash{}, stackTrieGenerate,
+		func(db ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) {
+			storageIt, _ := snap.StorageIterator(accountHash, common.Hash{})
+			defer storageIt.Release()
+
+			hash, err := generateTrieRoot(nil, storageIt, accountHash, stackTrieGenerate, nil, stat, false)
+			if err != nil {
+				return common.Hash{}, err
+			}
+			return hash, nil
+		}, newGenerateStats(), true)
+
+	if err != nil {
+		t.Fatal(err)
+	}
+	if snapRoot != trieRoot {
+		t.Fatalf("snaproot: %#x != trieroot #%x", snapRoot, trieRoot)
+	}
+}
+
+type testHelper struct {
+	diskdb  *memorydb.Database
+	triedb  *trie.Database
+	accTrie *trie.SecureTrie
+}
+
+func newHelper() *testHelper {
+	diskdb := memorydb.New()
+	triedb := trie.NewDatabase(diskdb)
+	accTrie, _ := trie.NewSecure(common.Hash{}, triedb)
+	return &testHelper{
+		diskdb:  diskdb,
+		triedb:  triedb,
+		accTrie: accTrie,
+	}
+}
+
+func (t *testHelper) addTrieAccount(acckey string, acc *Account) {
+	val, _ := rlp.EncodeToBytes(acc)
+	t.accTrie.Update([]byte(acckey), val)
+}
+
+func (t *testHelper) addSnapAccount(acckey string, acc *Account) {
+	val, _ := rlp.EncodeToBytes(acc)
+	key := hashData([]byte(acckey))
+	rawdb.WriteAccountSnapshot(t.diskdb, key, val)
+}
+
+func (t *testHelper) addAccount(acckey string, acc *Account) {
+	t.addTrieAccount(acckey, acc)
+	t.addSnapAccount(acckey, acc)
+}
+
+func (t *testHelper) addSnapStorage(accKey string, keys []string, vals []string) {
+	accHash := hashData([]byte(accKey))
+	for i, key := range keys {
+		rawdb.WriteStorageSnapshot(t.diskdb, accHash, hashData([]byte(key)), []byte(vals[i]))
+	}
+}
+
+func (t *testHelper) makeStorageTrie(keys []string, vals []string) []byte {
+	stTrie, _ := trie.NewSecure(common.Hash{}, t.triedb)
+	for i, k := range keys {
+		stTrie.Update([]byte(k), []byte(vals[i]))
+	}
+	root, _ := stTrie.Commit(nil)
+	return root.Bytes()
+}
+
+func (t *testHelper) Generate() (common.Hash, *diskLayer) {
+	root, _ := t.accTrie.Commit(nil)
+	t.triedb.Commit(root, false, nil)
+	snap := generateSnapshot(t.diskdb, t.triedb, 16, root)
+	return root, snap
+}
+
+// Tests that snapshot generation with existent flat state, where the flat state
+// contains some errors:
+// - the contract with empty storage root but has storage entries in the disk
+// - the contract with non empty storage root but empty storage slots
+// - the contract(non-empty storage) misses some storage slots
+//   - miss in the beginning
+//   - miss in the middle
+//   - miss in the end
+// - the contract(non-empty storage) has wrong storage slots
+//   - wrong slots in the beginning
+//   - wrong slots in the middle
+//   - wrong slots in the end
+// - the contract(non-empty storage) has extra storage slots
+//   - extra slots in the beginning
+//   - extra slots in the middle
+//   - extra slots in the end
+func TestGenerateExistentStateWithWrongStorage(t *testing.T) {
+	helper := newHelper()
+	stRoot := helper.makeStorageTrie([]string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"})
+
+	// Account one, empty root but non-empty database
+	helper.addAccount("acc-1", &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()})
+	helper.addSnapStorage("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"})
+
+	// Account two, non empty root but empty database
+	helper.addAccount("acc-2", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()})
+
+	// Miss slots
+	{
+		// Account three, non empty root but misses slots in the beginning
+		helper.addAccount("acc-3", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()})
+		helper.addSnapStorage("acc-3", []string{"key-2", "key-3"}, []string{"val-2", "val-3"})
+
+		// Account four, non empty root but misses slots in the middle
+		helper.addAccount("acc-4", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()})
+		helper.addSnapStorage("acc-4", []string{"key-1", "key-3"}, []string{"val-1", "val-3"})
+
+		// Account five, non empty root but misses slots in the end
+		helper.addAccount("acc-5", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()})
+		helper.addSnapStorage("acc-5", []string{"key-1", "key-2"}, []string{"val-1", "val-2"})
+	}
+
+	// Wrong storage slots
+	{
+		// Account six, non empty root but wrong slots in the beginning
+		helper.addAccount("acc-6", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()})
+		helper.addSnapStorage("acc-6", []string{"key-1", "key-2", "key-3"}, []string{"badval-1", "val-2", "val-3"})
+
+		// Account seven, non empty root but wrong slots in the middle
+		helper.addAccount("acc-7", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()})
+		helper.addSnapStorage("acc-7", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "badval-2", "val-3"})
+
+		// Account eight, non empty root but wrong slots in the end
+		helper.addAccount("acc-8", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()})
+		helper.addSnapStorage("acc-8", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "badval-3"})
+
+		// Account 9, non empty root but rotated slots
+		helper.addAccount("acc-9", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()})
+		helper.addSnapStorage("acc-9", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-3", "val-2"})
+	}
+
+	// Extra storage slots
+	{
+		// Account 10, non empty root but extra slots in the beginning
+		helper.addAccount("acc-10", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()})
+		helper.addSnapStorage("acc-10", []string{"key-0", "key-1", "key-2", "key-3"}, []string{"val-0", "val-1", "val-2", "val-3"})
+
+		// Account 11, non empty root but extra slots in the middle
+		helper.addAccount("acc-11", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()})
+		helper.addSnapStorage("acc-11", []string{"key-1", "key-2", "key-2-1", "key-3"}, []string{"val-1", "val-2", "val-2-1", "val-3"})
+
+		// Account 12, non empty root but extra slots in the end
+		helper.addAccount("acc-12", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()})
+		helper.addSnapStorage("acc-12", []string{"key-1", "key-2", "key-3", "key-4"}, []string{"val-1", "val-2", "val-3", "val-4"})
+	}
+
+	root, snap := helper.Generate()
+	t.Logf("Root: %#x\n", root) // Root = 0x8746cce9fd9c658b2cfd639878ed6584b7a2b3e73bb40f607fcfa156002429a0
+
+	select {
+	case <-snap.genPending:
+		// Snapshot generation succeeded
+
+	case <-time.After(250 * time.Millisecond):
+		t.Errorf("Snapshot generation failed")
+	}
+	checkSnapRoot(t, snap, root)
+	// Signal abortion to the generator and wait for it to tear down
+	stop := make(chan *generatorStats)
+	snap.genAbort <- stop
+	<-stop
+}
+
+// Tests that snapshot generation with existent flat state, where the flat state
+// contains some errors:
+// - miss accounts
+// - wrong accounts
+// - extra accounts
+func TestGenerateExistentStateWithWrongAccounts(t *testing.T) {
+	helper := newHelper()
+	stRoot := helper.makeStorageTrie([]string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"})
+
+	// Trie accounts [acc-1, acc-2, acc-3, acc-4, acc-6]
+	// Extra accounts [acc-0, acc-5, acc-7]
+
+	// Missing accounts, only in the trie
+	{
+		helper.addTrieAccount("acc-1", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // Beginning
+		helper.addTrieAccount("acc-4", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // Middle
+		helper.addTrieAccount("acc-6", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // End
+	}
+
+	// Wrong accounts
+	{
+		helper.addTrieAccount("acc-2", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()})
+		helper.addSnapAccount("acc-2", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: common.Hex2Bytes("0x1234")})
+
+		helper.addTrieAccount("acc-3", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()})
+		helper.addSnapAccount("acc-3", &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()})
+	}
+
+	// Extra accounts, only in the snap
+	{
+		helper.addSnapAccount("acc-0", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyRoot.Bytes()})                     // before the beginning
+		helper.addSnapAccount("acc-5", &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: common.Hex2Bytes("0x1234")}) // Middle
+		helper.addSnapAccount("acc-7", &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyRoot.Bytes()})          // after the end
+	}
+
+	root, snap := helper.Generate()
+	t.Logf("Root: %#x\n", root) // Root = 0x825891472281463511e7ebcc7f109e4f9200c20fa384754e11fd605cd98464e8
+
+	select {
+	case <-snap.genPending:
+		// Snapshot generation succeeded
+
+	case <-time.After(250 * time.Millisecond):
+		t.Errorf("Snapshot generation failed")
+	}
+	checkSnapRoot(t, snap, root)
+
+	// Signal abortion to the generator and wait for it to tear down
+	stop := make(chan *generatorStats)
+	snap.genAbort <- stop
+	<-stop
+}
+
 // Tests that snapshot generation errors out correctly in case of a missing trie
 // node in the account trie.
 func TestGenerateCorruptAccountTrie(t *testing.T) {
@@ -55,7 +400,7 @@ func TestGenerateCorruptAccountTrie(t *testing.T) {
 	triedb.Commit(common.HexToHash("0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978"), false, nil)
 	diskdb.Delete(common.HexToHash("0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7").Bytes())
 
-	snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978"), nil)
+	snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978"))
 	select {
 	case <-snap.genPending:
 		// Snapshot generation succeeded
@@ -115,7 +460,7 @@ func TestGenerateMissingStorageTrie(t *testing.T) {
 	// Delete a storage trie root and ensure the generator chokes
 	diskdb.Delete(common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67").Bytes())
 
-	snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), nil)
+	snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"))
 	select {
 	case <-snap.genPending:
 		// Snapshot generation succeeded
@@ -174,7 +519,7 @@ func TestGenerateCorruptStorageTrie(t *testing.T) {
 	// Delete a storage trie leaf and ensure the generator chokes
 	diskdb.Delete(common.HexToHash("0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371").Bytes())
 
-	snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), nil)
+	snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"))
 	select {
 	case <-snap.genPending:
 		// Snapshot generation succeeded
@@ -188,3 +533,301 @@ func TestGenerateCorruptStorageTrie(t *testing.T) {
 	snap.genAbort <- stop
 	<-stop
 }
+
+func getStorageTrie(n int, triedb *trie.Database) *trie.SecureTrie {
+	stTrie, _ := trie.NewSecure(common.Hash{}, triedb)
+	for i := 0; i < n; i++ {
+		k := fmt.Sprintf("key-%d", i)
+		v := fmt.Sprintf("val-%d", i)
+		stTrie.Update([]byte(k), []byte(v))
+	}
+	stTrie.Commit(nil)
+	return stTrie
+}
+
+// Tests that snapshot generation when an extra account with storage exists in the snap state.
+func TestGenerateWithExtraAccounts(t *testing.T) {
+	var (
+		diskdb = memorydb.New()
+		triedb = trie.NewDatabase(diskdb)
+		stTrie = getStorageTrie(5, triedb)
+	)
+	accTrie, _ := trie.NewSecure(common.Hash{}, triedb)
+	{ // Account one in the trie
+		acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()}
+		val, _ := rlp.EncodeToBytes(acc)
+		accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e
+		// Identical in the snap
+		key := hashData([]byte("acc-1"))
+		rawdb.WriteAccountSnapshot(diskdb, key, val)
+		rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-1")), []byte("val-1"))
+		rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-2")), []byte("val-2"))
+		rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-3")), []byte("val-3"))
+		rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-4")), []byte("val-4"))
+		rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-5")), []byte("val-5"))
+	}
+	{ // Account two exists only in the snapshot
+		acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()}
+		val, _ := rlp.EncodeToBytes(acc)
+		key := hashData([]byte("acc-2"))
+		rawdb.WriteAccountSnapshot(diskdb, key, val)
+		rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("b-key-1")), []byte("b-val-1"))
+		rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("b-key-2")), []byte("b-val-2"))
+		rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("b-key-3")), []byte("b-val-3"))
+	}
+	root, _ := accTrie.Commit(nil)
+	t.Logf("root: %x", root)
+	triedb.Commit(root, false, nil)
+	// To verify the test: If we now inspect the snap db, there should exist extraneous storage items
+	if data := rawdb.ReadStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data == nil {
+		t.Fatalf("expected snap storage to exist")
+	}
+
+	snap := generateSnapshot(diskdb, triedb, 16, root)
+	select {
+	case <-snap.genPending:
+		// Snapshot generation succeeded
+
+	case <-time.After(250 * time.Millisecond):
+		t.Errorf("Snapshot generation failed")
+	}
+	checkSnapRoot(t, snap, root)
+	// Signal abortion to the generator and wait for it to tear down
+	stop := make(chan *generatorStats)
+	snap.genAbort <- stop
+	<-stop
+	// If we now inspect the snap db, there should exist no extraneous storage items
+	if data := rawdb.ReadStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data != nil {
+		t.Fatalf("expected slot to be removed, got %v", string(data))
+	}
+}
+
+func enableLogging() {
+	log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true))))
+}
+
+// Tests that snapshot generation when an extra account with storage exists in the snap state.
+func TestGenerateWithManyExtraAccounts(t *testing.T) {
+	if false {
+		enableLogging()
+	}
+	var (
+		diskdb = memorydb.New()
+		triedb = trie.NewDatabase(diskdb)
+		stTrie = getStorageTrie(3, triedb)
+	)
+	accTrie, _ := trie.NewSecure(common.Hash{}, triedb)
+	{ // Account one in the trie
+		acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()}
+		val, _ := rlp.EncodeToBytes(acc)
+		accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e
+		// Identical in the snap
+		key := hashData([]byte("acc-1"))
+		rawdb.WriteAccountSnapshot(diskdb, key, val)
+		rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-1")), []byte("val-1"))
+		rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-2")), []byte("val-2"))
+		rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-3")), []byte("val-3"))
+	}
+	{ // 100 accounts exist only in snapshot
+		for i := 0; i < 1000; i++ {
+			//acc := &Account{Balance: big.NewInt(int64(i)), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()}
+			acc := &Account{Balance: big.NewInt(int64(i)), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}
+			val, _ := rlp.EncodeToBytes(acc)
+			key := hashData([]byte(fmt.Sprintf("acc-%d", i)))
+			rawdb.WriteAccountSnapshot(diskdb, key, val)
+		}
+	}
+	root, _ := accTrie.Commit(nil)
+	t.Logf("root: %x", root)
+	triedb.Commit(root, false, nil)
+
+	snap := generateSnapshot(diskdb, triedb, 16, root)
+	select {
+	case <-snap.genPending:
+		// Snapshot generation succeeded
+
+	case <-time.After(250 * time.Millisecond):
+		t.Errorf("Snapshot generation failed")
+	}
+	checkSnapRoot(t, snap, root)
+	// Signal abortion to the generator and wait for it to tear down
+	stop := make(chan *generatorStats)
+	snap.genAbort <- stop
+	<-stop
+}
+
+// Tests this case
+// maxAccountRange 3
+// snapshot-accounts: 01, 02, 03, 04, 05, 06, 07
+// trie-accounts:             03,             07
+//
+// We iterate three snapshot storage slots (max = 3) from the database. They are 0x01, 0x02, 0x03.
+// The trie has a lot of deletions.
+// So in trie, we iterate 2 entries 0x03, 0x07. We create the 0x07 in the database and abort the procedure, because the trie is exhausted.
+// But in the database, we still have the stale storage slots 0x04, 0x05. They are not iterated yet, but the procedure is finished.
+func TestGenerateWithExtraBeforeAndAfter(t *testing.T) {
+	accountCheckRange = 3
+	if false {
+		enableLogging()
+	}
+	var (
+		diskdb = memorydb.New()
+		triedb = trie.NewDatabase(diskdb)
+	)
+	accTrie, _ := trie.New(common.Hash{}, triedb)
+	{
+		acc := &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}
+		val, _ := rlp.EncodeToBytes(acc)
+		accTrie.Update(common.HexToHash("0x03").Bytes(), val)
+		accTrie.Update(common.HexToHash("0x07").Bytes(), val)
+
+		rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x01"), val)
+		rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x02"), val)
+		rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x03"), val)
+		rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x04"), val)
+		rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x05"), val)
+		rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x06"), val)
+		rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x07"), val)
+	}
+
+	root, _ := accTrie.Commit(nil)
+	t.Logf("root: %x", root)
+	triedb.Commit(root, false, nil)
+
+	snap := generateSnapshot(diskdb, triedb, 16, root)
+	select {
+	case <-snap.genPending:
+		// Snapshot generation succeeded
+
+	case <-time.After(250 * time.Millisecond):
+		t.Errorf("Snapshot generation failed")
+	}
+	checkSnapRoot(t, snap, root)
+	// Signal abortion to the generator and wait for it to tear down
+	stop := make(chan *generatorStats)
+	snap.genAbort <- stop
+	<-stop
+}
+
+// TestGenerateWithMalformedSnapdata tests what happes if we have some junk
+// in the snapshot database, which cannot be parsed back to an account
+func TestGenerateWithMalformedSnapdata(t *testing.T) {
+	accountCheckRange = 3
+	if false {
+		enableLogging()
+	}
+	var (
+		diskdb = memorydb.New()
+		triedb = trie.NewDatabase(diskdb)
+	)
+	accTrie, _ := trie.New(common.Hash{}, triedb)
+	{
+		acc := &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}
+		val, _ := rlp.EncodeToBytes(acc)
+		accTrie.Update(common.HexToHash("0x03").Bytes(), val)
+
+		junk := make([]byte, 100)
+		copy(junk, []byte{0xde, 0xad})
+		rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x02"), junk)
+		rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x03"), junk)
+		rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x04"), junk)
+		rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x05"), junk)
+	}
+
+	root, _ := accTrie.Commit(nil)
+	t.Logf("root: %x", root)
+	triedb.Commit(root, false, nil)
+
+	snap := generateSnapshot(diskdb, triedb, 16, root)
+	select {
+	case <-snap.genPending:
+		// Snapshot generation succeeded
+
+	case <-time.After(250 * time.Millisecond):
+		t.Errorf("Snapshot generation failed")
+	}
+	checkSnapRoot(t, snap, root)
+	// Signal abortion to the generator and wait for it to tear down
+	stop := make(chan *generatorStats)
+	snap.genAbort <- stop
+	<-stop
+	// If we now inspect the snap db, there should exist no extraneous storage items
+	if data := rawdb.ReadStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data != nil {
+		t.Fatalf("expected slot to be removed, got %v", string(data))
+	}
+}
+
+func TestGenerateFromEmptySnap(t *testing.T) {
+	//enableLogging()
+	accountCheckRange = 10
+	storageCheckRange = 20
+	helper := newHelper()
+	stRoot := helper.makeStorageTrie([]string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"})
+	// Add 1K accounts to the trie
+	for i := 0; i < 400; i++ {
+		helper.addTrieAccount(fmt.Sprintf("acc-%d", i),
+			&Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()})
+	}
+	root, snap := helper.Generate()
+	t.Logf("Root: %#x\n", root) // Root: 0x6f7af6d2e1a1bf2b84a3beb3f8b64388465fbc1e274ca5d5d3fc787ca78f59e4
+
+	select {
+	case <-snap.genPending:
+		// Snapshot generation succeeded
+
+	case <-time.After(1 * time.Second):
+		t.Errorf("Snapshot generation failed")
+	}
+	checkSnapRoot(t, snap, root)
+	// Signal abortion to the generator and wait for it to tear down
+	stop := make(chan *generatorStats)
+	snap.genAbort <- stop
+	<-stop
+}
+
+// Tests that snapshot generation with existent flat state, where the flat state
+// storage is correct, but incomplete.
+// The incomplete part is on the second range
+// snap: [ 0x01, 0x02, 0x03, 0x04] , [ 0x05, 0x06, 0x07, {missing}] (with storageCheck = 4)
+// trie:  0x01, 0x02, 0x03, 0x04,  0x05, 0x06, 0x07, 0x08
+// This hits a case where the snap verification passes, but there are more elements in the trie
+// which we must also add.
+func TestGenerateWithIncompleteStorage(t *testing.T) {
+	storageCheckRange = 4
+	helper := newHelper()
+	stKeys := []string{"1", "2", "3", "4", "5", "6", "7", "8"}
+	stVals := []string{"v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8"}
+	stRoot := helper.makeStorageTrie(stKeys, stVals)
+	// We add 8 accounts, each one is missing exactly one of the storage slots. This means
+	// we don't have to order the keys and figure out exactly which hash-key winds up
+	// on the sensitive spots at the boundaries
+	for i := 0; i < 8; i++ {
+		accKey := fmt.Sprintf("acc-%d", i)
+		helper.addAccount(accKey, &Account{Balance: big.NewInt(int64(i)), Root: stRoot, CodeHash: emptyCode.Bytes()})
+		var moddedKeys []string
+		var moddedVals []string
+		for ii := 0; ii < 8; ii++ {
+			if ii != i {
+				moddedKeys = append(moddedKeys, stKeys[ii])
+				moddedVals = append(moddedVals, stVals[ii])
+			}
+		}
+		helper.addSnapStorage(accKey, moddedKeys, moddedVals)
+	}
+
+	root, snap := helper.Generate()
+	t.Logf("Root: %#x\n", root) // Root: 0xca73f6f05ba4ca3024ef340ef3dfca8fdabc1b677ff13f5a9571fd49c16e67ff
+
+	select {
+	case <-snap.genPending:
+		// Snapshot generation succeeded
+
+	case <-time.After(250 * time.Millisecond):
+		t.Errorf("Snapshot generation failed")
+	}
+	checkSnapRoot(t, snap, root)
+	// Signal abortion to the generator and wait for it to tear down
+	stop := make(chan *generatorStats)
+	snap.genAbort <- stop
+	<-stop
+}

+ 4 - 11
core/state/snapshot/journal.go

@@ -37,7 +37,10 @@ const journalVersion uint64 = 0
 
 // journalGenerator is a disk layer entry containing the generator progress marker.
 type journalGenerator struct {
-	Wiping   bool // Whether the database was in progress of being wiped
+	// Indicator that whether the database was in progress of being wiped.
+	// It's deprecated but keep it here for background compatibility.
+	Wiping bool
+
 	Done     bool // Whether the generator finished creating the snapshot
 	Marker   []byte
 	Accounts uint64
@@ -193,14 +196,6 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int,
 	}
 	// Everything loaded correctly, resume any suspended operations
 	if !generator.Done {
-		// If the generator was still wiping, restart one from scratch (fine for
-		// now as it's rare and the wiper deletes the stuff it touches anyway, so
-		// restarting won't incur a lot of extra database hops.
-		var wiper chan struct{}
-		if generator.Wiping {
-			log.Info("Resuming previous snapshot wipe")
-			wiper = wipeSnapshot(diskdb, false)
-		}
 		// Whether or not wiping was in progress, load any generator progress too
 		base.genMarker = generator.Marker
 		if base.genMarker == nil {
@@ -214,7 +209,6 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int,
 			origin = binary.BigEndian.Uint64(generator.Marker)
 		}
 		go base.generate(&generatorStats{
-			wiping:   wiper,
 			origin:   origin,
 			start:    time.Now(),
 			accounts: generator.Accounts,
@@ -381,7 +375,6 @@ func (dl *diskLayer) LegacyJournal(buffer *bytes.Buffer) (common.Hash, error) {
 		Marker: dl.genMarker,
 	}
 	if stats != nil {
-		entry.Wiping = (stats.wiping != nil)
 		entry.Accounts = stats.accounts
 		entry.Slots = stats.slots
 		entry.Storage = uint64(stats.storage)

+ 2 - 8
core/state/snapshot/snapshot.go

@@ -656,9 +656,6 @@ func (t *Tree) Rebuild(root common.Hash) {
 	// building a brand new snapshot.
 	rawdb.DeleteSnapshotRecoveryNumber(t.diskdb)
 
-	// Track whether there's a wipe currently running and keep it alive if so
-	var wiper chan struct{}
-
 	// Iterate over and mark all layers stale
 	for _, layer := range t.layers {
 		switch layer := layer.(type) {
@@ -667,10 +664,7 @@ func (t *Tree) Rebuild(root common.Hash) {
 			if layer.genAbort != nil {
 				abort := make(chan *generatorStats)
 				layer.genAbort <- abort
-
-				if stats := <-abort; stats != nil {
-					wiper = stats.wiping
-				}
+				<-abort
 			}
 			// Layer should be inactive now, mark it as stale
 			layer.lock.Lock()
@@ -691,7 +685,7 @@ func (t *Tree) Rebuild(root common.Hash) {
 	// generator will run a wiper first if there's not one running right now.
 	log.Info("Rebuilding state snapshot")
 	t.layers = map[common.Hash]snapshot{
-		root: generateSnapshot(t.diskdb, t.triedb, t.cache, root, wiper),
+		root: generateSnapshot(t.diskdb, t.triedb, t.cache, root),
 	}
 }
 

+ 24 - 8
core/state/snapshot/wipe.go

@@ -24,10 +24,11 @@ import (
 	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/log"
+	"github.com/ethereum/go-ethereum/metrics"
 )
 
 // wipeSnapshot starts a goroutine to iterate over the entire key-value database
-// and delete all the  data associated with the snapshot (accounts, storage,
+// and delete all the data associated with the snapshot (accounts, storage,
 // metadata). After all is done, the snapshot range of the database is compacted
 // to free up unused data blocks.
 func wipeSnapshot(db ethdb.KeyValueStore, full bool) chan struct{} {
@@ -53,10 +54,10 @@ func wipeSnapshot(db ethdb.KeyValueStore, full bool) chan struct{} {
 // removed in sync to avoid data races. After all is done, the snapshot range of
 // the database is compacted to free up unused data blocks.
 func wipeContent(db ethdb.KeyValueStore) error {
-	if err := wipeKeyRange(db, "accounts", rawdb.SnapshotAccountPrefix, len(rawdb.SnapshotAccountPrefix)+common.HashLength); err != nil {
+	if err := wipeKeyRange(db, "accounts", rawdb.SnapshotAccountPrefix, nil, nil, len(rawdb.SnapshotAccountPrefix)+common.HashLength, snapWipedAccountMeter, true); err != nil {
 		return err
 	}
-	if err := wipeKeyRange(db, "storage", rawdb.SnapshotStoragePrefix, len(rawdb.SnapshotStoragePrefix)+2*common.HashLength); err != nil {
+	if err := wipeKeyRange(db, "storage", rawdb.SnapshotStoragePrefix, nil, nil, len(rawdb.SnapshotStoragePrefix)+2*common.HashLength, snapWipedStorageMeter, true); err != nil {
 		return err
 	}
 	// Compact the snapshot section of the database to get rid of unused space
@@ -82,8 +83,11 @@ func wipeContent(db ethdb.KeyValueStore) error {
 }
 
 // wipeKeyRange deletes a range of keys from the database starting with prefix
-// and having a specific total key length.
-func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, keylen int) error {
+// and having a specific total key length. The start and limit is optional for
+// specifying a particular key range for deletion.
+//
+// Origin is included for wiping and limit is excluded if they are specified.
+func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, origin []byte, limit []byte, keylen int, meter metrics.Meter, report bool) error {
 	// Batch deletions together to avoid holding an iterator for too long
 	var (
 		batch = db.NewBatch()
@@ -92,7 +96,11 @@ func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, keylen int
 	// Iterate over the key-range and delete all of them
 	start, logged := time.Now(), time.Now()
 
-	it := db.NewIterator(prefix, nil)
+	it := db.NewIterator(prefix, origin)
+	var stop []byte
+	if limit != nil {
+		stop = append(prefix, limit...)
+	}
 	for it.Next() {
 		// Skip any keys with the correct prefix but wrong length (trie nodes)
 		key := it.Key()
@@ -102,6 +110,9 @@ func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, keylen int
 		if len(key) != keylen {
 			continue
 		}
+		if stop != nil && bytes.Compare(key, stop) >= 0 {
+			break
+		}
 		// Delete the key and periodically recreate the batch and iterator
 		batch.Delete(key)
 		items++
@@ -116,7 +127,7 @@ func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, keylen int
 			seekPos := key[len(prefix):]
 			it = db.NewIterator(prefix, seekPos)
 
-			if time.Since(logged) > 8*time.Second {
+			if time.Since(logged) > 8*time.Second && report {
 				log.Info("Deleting state snapshot leftovers", "kind", kind, "wiped", items, "elapsed", common.PrettyDuration(time.Since(start)))
 				logged = time.Now()
 			}
@@ -126,6 +137,11 @@ func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, keylen int
 	if err := batch.Write(); err != nil {
 		return err
 	}
-	log.Info("Deleted state snapshot leftovers", "kind", kind, "wiped", items, "elapsed", common.PrettyDuration(time.Since(start)))
+	if meter != nil {
+		meter.Mark(int64(items))
+	}
+	if report {
+		log.Info("Deleted state snapshot leftovers", "kind", kind, "wiped", items, "elapsed", common.PrettyDuration(time.Since(start)))
+	}
 	return nil
 }

+ 1 - 1
core/state/statedb.go

@@ -948,7 +948,7 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) {
 	// The onleaf func is called _serially_, so we can reuse the same account
 	// for unmarshalling every time.
 	var account Account
-	root, err := s.trie.Commit(func(path []byte, leaf []byte, parent common.Hash) error {
+	root, err := s.trie.Commit(func(_ [][]byte, _ []byte, leaf []byte, parent common.Hash) error {
 		if err := rlp.DecodeBytes(leaf, &account); err != nil {
 			return nil
 		}

+ 19 - 5
core/state/sync.go

@@ -26,17 +26,31 @@ import (
 )
 
 // NewStateSync create a new state trie download scheduler.
-func NewStateSync(root common.Hash, database ethdb.KeyValueReader, bloom *trie.SyncBloom) *trie.Sync {
+func NewStateSync(root common.Hash, database ethdb.KeyValueReader, bloom *trie.SyncBloom, onLeaf func(paths [][]byte, leaf []byte) error) *trie.Sync {
+	// Register the storage slot callback if the external callback is specified.
+	var onSlot func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error
+	if onLeaf != nil {
+		onSlot = func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error {
+			return onLeaf(paths, leaf)
+		}
+	}
+	// Register the account callback to connect the state trie and the storage
+	// trie belongs to the contract.
 	var syncer *trie.Sync
-	callback := func(path []byte, leaf []byte, parent common.Hash) error {
+	onAccount := func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error {
+		if onLeaf != nil {
+			if err := onLeaf(paths, leaf); err != nil {
+				return err
+			}
+		}
 		var obj Account
 		if err := rlp.Decode(bytes.NewReader(leaf), &obj); err != nil {
 			return err
 		}
-		syncer.AddSubTrie(obj.Root, path, parent, nil)
-		syncer.AddCodeEntry(common.BytesToHash(obj.CodeHash), path, parent)
+		syncer.AddSubTrie(obj.Root, hexpath, parent, onSlot)
+		syncer.AddCodeEntry(common.BytesToHash(obj.CodeHash), hexpath, parent)
 		return nil
 	}
-	syncer = trie.NewSync(root, database, callback, bloom)
+	syncer = trie.NewSync(root, database, onAccount, bloom)
 	return syncer
 }

+ 6 - 6
core/state/sync_test.go

@@ -133,7 +133,7 @@ func checkStateConsistency(db ethdb.Database, root common.Hash) error {
 // Tests that an empty state is not scheduled for syncing.
 func TestEmptyStateSync(t *testing.T) {
 	empty := common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
-	sync := NewStateSync(empty, rawdb.NewMemoryDatabase(), trie.NewSyncBloom(1, memorydb.New()))
+	sync := NewStateSync(empty, rawdb.NewMemoryDatabase(), trie.NewSyncBloom(1, memorydb.New()), nil)
 	if nodes, paths, codes := sync.Missing(1); len(nodes) != 0 || len(paths) != 0 || len(codes) != 0 {
 		t.Errorf(" content requested for empty state: %v, %v, %v", nodes, paths, codes)
 	}
@@ -170,7 +170,7 @@ func testIterativeStateSync(t *testing.T, count int, commit bool, bypath bool) {
 
 	// Create a destination state and sync with the scheduler
 	dstDb := rawdb.NewMemoryDatabase()
-	sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
+	sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil)
 
 	nodes, paths, codes := sched.Missing(count)
 	var (
@@ -249,7 +249,7 @@ func TestIterativeDelayedStateSync(t *testing.T) {
 
 	// Create a destination state and sync with the scheduler
 	dstDb := rawdb.NewMemoryDatabase()
-	sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
+	sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil)
 
 	nodes, _, codes := sched.Missing(0)
 	queue := append(append([]common.Hash{}, nodes...), codes...)
@@ -297,7 +297,7 @@ func testIterativeRandomStateSync(t *testing.T, count int) {
 
 	// Create a destination state and sync with the scheduler
 	dstDb := rawdb.NewMemoryDatabase()
-	sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
+	sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil)
 
 	queue := make(map[common.Hash]struct{})
 	nodes, _, codes := sched.Missing(count)
@@ -347,7 +347,7 @@ func TestIterativeRandomDelayedStateSync(t *testing.T) {
 
 	// Create a destination state and sync with the scheduler
 	dstDb := rawdb.NewMemoryDatabase()
-	sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
+	sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil)
 
 	queue := make(map[common.Hash]struct{})
 	nodes, _, codes := sched.Missing(0)
@@ -414,7 +414,7 @@ func TestIncompleteStateSync(t *testing.T) {
 
 	// Create a destination state and sync with the scheduler
 	dstDb := rawdb.NewMemoryDatabase()
-	sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb))
+	sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil)
 
 	var added []common.Hash
 

+ 1 - 1
eth/downloader/statesync.go

@@ -298,7 +298,7 @@ func newStateSync(d *Downloader, root common.Hash) *stateSync {
 	return &stateSync{
 		d:         d,
 		root:      root,
-		sched:     state.NewStateSync(root, d.stateDB, d.stateBloom),
+		sched:     state.NewStateSync(root, d.stateDB, d.stateBloom, nil),
 		keccak:    sha3.NewLegacyKeccak256().(crypto.KeccakState),
 		trieTasks: make(map[common.Hash]*trieTask),
 		codeTasks: make(map[common.Hash]*codeTask),

+ 73 - 8
eth/protocols/snap/sync.go

@@ -29,6 +29,7 @@ import (
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/core/state"
+	"github.com/ethereum/go-ethereum/core/state/snapshot"
 	"github.com/ethereum/go-ethereum/crypto"
 	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/event"
@@ -51,7 +52,7 @@ const (
 	// maxRequestSize is the maximum number of bytes to request from a remote peer.
 	maxRequestSize = 512 * 1024
 
-	// maxStorageSetRequestCountis th maximum number of contracts to request the
+	// maxStorageSetRequestCount is the maximum number of contracts to request the
 	// storage of in a single query. If this number is too low, we're not filling
 	// responses fully and waste round trip times. If it's too high, we're capping
 	// responses and waste bandwidth.
@@ -435,9 +436,14 @@ type Syncer struct {
 	bytecodeHealDups   uint64             // Number of bytecodes already processed
 	bytecodeHealNops   uint64             // Number of bytecodes not requested
 
-	startTime time.Time   // Time instance when snapshot sync started
-	startAcc  common.Hash // Account hash where sync started from
-	logTime   time.Time   // Time instance when status was last reported
+	stateWriter        ethdb.Batch        // Shared batch writer used for persisting raw states
+	accountHealed      uint64             // Number of accounts downloaded during the healing stage
+	accountHealedBytes common.StorageSize // Number of raw account bytes persisted to disk during the healing stage
+	storageHealed      uint64             // Number of storage slots downloaded during the healing stage
+	storageHealedBytes common.StorageSize // Number of raw storage bytes persisted to disk during the healing stage
+
+	startTime time.Time // Time instance when snapshot sync started
+	logTime   time.Time // Time instance when status was last reported
 
 	pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown
 	lock sync.RWMutex   // Protects fields that can change outside of sync (peers, reqs, root)
@@ -477,6 +483,7 @@ func NewSyncer(db ethdb.KeyValueStore) *Syncer {
 		bytecodeHealReqFails: make(chan *bytecodeHealRequest),
 		trienodeHealResps:    make(chan *trienodeHealResponse),
 		bytecodeHealResps:    make(chan *bytecodeHealResponse),
+		stateWriter:          db.NewBatch(),
 	}
 }
 
@@ -544,7 +551,7 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
 	s.lock.Lock()
 	s.root = root
 	s.healer = &healTask{
-		scheduler: state.NewStateSync(root, s.db, nil),
+		scheduler: state.NewStateSync(root, s.db, nil, s.onHealState),
 		trieTasks: make(map[common.Hash]trie.SyncPath),
 		codeTasks: make(map[common.Hash]struct{}),
 	}
@@ -560,6 +567,11 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
 		log.Debug("Snapshot sync already completed")
 		return nil
 	}
+	// If sync is still not finished, we need to ensure that any marker is wiped.
+	// Otherwise, it may happen that requests for e.g. genesis-data is delivered
+	// from the snapshot data, instead of from the trie
+	snapshot.ClearSnapshotMarker(s.db)
+
 	defer func() { // Persist any progress, independent of failure
 		for _, task := range s.tasks {
 			s.forwardAccountTask(task)
@@ -569,6 +581,14 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
 	}()
 
 	log.Debug("Starting snapshot sync cycle", "root", root)
+
+	// Flush out the last committed raw states
+	defer func() {
+		if s.stateWriter.ValueSize() > 0 {
+			s.stateWriter.Write()
+			s.stateWriter.Reset()
+		}
+	}()
 	defer s.report(true)
 
 	// Whether sync completed or not, disregard any future packets
@@ -1694,7 +1714,7 @@ func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) {
 // processStorageResponse integrates an already validated storage response
 // into the account tasks.
 func (s *Syncer) processStorageResponse(res *storageResponse) {
-	// Switch the suntask from pending to idle
+	// Switch the subtask from pending to idle
 	if res.subTask != nil {
 		res.subTask.req = nil
 	}
@@ -1826,6 +1846,14 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
 			nodes++
 		}
 		it.Release()
+
+		// Persist the received storage segements. These flat state maybe
+		// outdated during the sync, but it can be fixed later during the
+		// snapshot generation.
+		for j := 0; j < len(res.hashes[i]); j++ {
+			rawdb.WriteStorageSnapshot(batch, account, res.hashes[i][j], res.slots[i][j])
+			bytes += common.StorageSize(1 + 2*common.HashLength + len(res.slots[i][j]))
+		}
 	}
 	if err := batch.Write(); err != nil {
 		log.Crit("Failed to persist storage slots", "err", err)
@@ -1983,6 +2011,14 @@ func (s *Syncer) forwardAccountTask(task *accountTask) {
 	}
 	it.Release()
 
+	// Persist the received account segements. These flat state maybe
+	// outdated during the sync, but it can be fixed later during the
+	// snapshot generation.
+	for i, hash := range res.hashes {
+		blob := snapshot.SlimAccountRLP(res.accounts[i].Nonce, res.accounts[i].Balance, res.accounts[i].Root, res.accounts[i].CodeHash)
+		rawdb.WriteAccountSnapshot(batch, hash, blob)
+		bytes += common.StorageSize(1 + common.HashLength + len(blob))
+	}
 	if err := batch.Write(); err != nil {
 		log.Crit("Failed to persist accounts", "err", err)
 	}
@@ -2569,6 +2605,33 @@ func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) e
 	return nil
 }
 
+// onHealState is a callback method to invoke when a flat state(account
+// or storage slot) is downloded during the healing stage. The flat states
+// can be persisted blindly and can be fixed later in the generation stage.
+// Note it's not concurrent safe, please handle the concurrent issue outside.
+func (s *Syncer) onHealState(paths [][]byte, value []byte) error {
+	if len(paths) == 1 {
+		var account state.Account
+		if err := rlp.DecodeBytes(value, &account); err != nil {
+			return nil
+		}
+		blob := snapshot.SlimAccountRLP(account.Nonce, account.Balance, account.Root, account.CodeHash)
+		rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), blob)
+		s.accountHealed += 1
+		s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(blob))
+	}
+	if len(paths) == 2 {
+		rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value)
+		s.storageHealed += 1
+		s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value))
+	}
+	if s.stateWriter.ValueSize() > ethdb.IdealBatchSize {
+		s.stateWriter.Write() // It's fine to ignore the error here
+		s.stateWriter.Reset()
+	}
+	return nil
+}
+
 // hashSpace is the total size of the 256 bit hash space for accounts.
 var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil)
 
@@ -2632,7 +2695,9 @@ func (s *Syncer) reportHealProgress(force bool) {
 	var (
 		trienode = fmt.Sprintf("%d@%v", s.trienodeHealSynced, s.trienodeHealBytes.TerminalString())
 		bytecode = fmt.Sprintf("%d@%v", s.bytecodeHealSynced, s.bytecodeHealBytes.TerminalString())
+		accounts = fmt.Sprintf("%d@%v", s.accountHealed, s.accountHealedBytes.TerminalString())
+		storage  = fmt.Sprintf("%d@%v", s.storageHealed, s.storageHealedBytes.TerminalString())
 	)
-	log.Info("State heal in progress", "nodes", trienode, "codes", bytecode,
-		"pending", s.healer.scheduler.Pending())
+	log.Info("State heal in progress", "accounts", accounts, "slots", storage,
+		"codes", bytecode, "nodes", trienode, "pending", s.healer.scheduler.Pending())
 }

+ 2 - 2
trie/committer.go

@@ -220,13 +220,13 @@ func (c *committer) commitLoop(db *Database) {
 			switch n := n.(type) {
 			case *shortNode:
 				if child, ok := n.Val.(valueNode); ok {
-					c.onleaf(nil, child, hash)
+					c.onleaf(nil, nil, child, hash)
 				}
 			case *fullNode:
 				// For children in range [0, 15], it's impossible
 				// to contain valuenode. Only check the 17th child.
 				if n.Children[16] != nil {
-					c.onleaf(nil, n.Children[16].(valueNode), hash)
+					c.onleaf(nil, nil, n.Children[16].(valueNode), hash)
 				}
 			}
 		}

+ 8 - 1
trie/sync.go

@@ -398,7 +398,14 @@ func (s *Sync) children(req *request, object node) ([]*request, error) {
 		// Notify any external watcher of a new key/value node
 		if req.callback != nil {
 			if node, ok := (child.node).(valueNode); ok {
-				if err := req.callback(child.path, node, req.hash); err != nil {
+				var paths [][]byte
+				if len(child.path) == 2*common.HashLength {
+					paths = append(paths, hexToKeybytes(child.path))
+				} else if len(child.path) == 4*common.HashLength {
+					paths = append(paths, hexToKeybytes(child.path[:2*common.HashLength]))
+					paths = append(paths, hexToKeybytes(child.path[2*common.HashLength:]))
+				}
+				if err := req.callback(paths, child.path, node, req.hash); err != nil {
 					return nil, err
 				}
 			}

+ 14 - 3
trie/trie.go

@@ -37,9 +37,20 @@ var (
 )
 
 // LeafCallback is a callback type invoked when a trie operation reaches a leaf
-// node. It's used by state sync and commit to allow handling external references
-// between account and storage tries.
-type LeafCallback func(path []byte, leaf []byte, parent common.Hash) error
+// node.
+//
+// The paths is a path tuple identifying a particular trie node either in a single
+// trie (account) or a layered trie (account -> storage). Each path in the tuple
+// is in the raw format(32 bytes).
+//
+// The hexpath is a composite hexary path identifying the trie node. All the key
+// bytes are converted to the hexary nibbles and composited with the parent path
+// if the trie node is in a layered trie.
+//
+// It's used by state sync and commit to allow handling external references
+// between account and storage tries. And also it's used in the state healing
+// for extracting the raw states(leaf nodes) with corresponding paths.
+type LeafCallback func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error
 
 // Trie is a Merkle Patricia Trie.
 // The zero value is an empty trie with no database.

+ 1 - 1
trie/trie_test.go

@@ -569,7 +569,7 @@ func BenchmarkCommitAfterHash(b *testing.B) {
 		benchmarkCommitAfterHash(b, nil)
 	})
 	var a account
-	onleaf := func(path []byte, leaf []byte, parent common.Hash) error {
+	onleaf := func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error {
 		rlp.DecodeBytes(leaf, &a)
 		return nil
 	}