snapshot.go 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876
  1. // Copyright 2019 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. // Package snapshot implements a journalled, dynamic state dump.
  17. package snapshot
  18. import (
  19. "bytes"
  20. "errors"
  21. "fmt"
  22. "sync"
  23. "sync/atomic"
  24. "github.com/ethereum/go-ethereum/common"
  25. "github.com/ethereum/go-ethereum/core/rawdb"
  26. "github.com/ethereum/go-ethereum/crypto"
  27. "github.com/ethereum/go-ethereum/ethdb"
  28. "github.com/ethereum/go-ethereum/log"
  29. "github.com/ethereum/go-ethereum/metrics"
  30. "github.com/ethereum/go-ethereum/rlp"
  31. "github.com/ethereum/go-ethereum/trie"
  32. )
  33. var (
  34. snapshotCleanAccountHitMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/hit", nil)
  35. snapshotCleanAccountMissMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/miss", nil)
  36. snapshotCleanAccountInexMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/inex", nil)
  37. snapshotCleanAccountReadMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/read", nil)
  38. snapshotCleanAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/write", nil)
  39. snapshotCleanStorageHitMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/hit", nil)
  40. snapshotCleanStorageMissMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/miss", nil)
  41. snapshotCleanStorageInexMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/inex", nil)
  42. snapshotCleanStorageReadMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/read", nil)
  43. snapshotCleanStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/write", nil)
  44. snapshotDirtyAccountHitMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/hit", nil)
  45. snapshotDirtyAccountMissMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/miss", nil)
  46. snapshotDirtyAccountInexMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/inex", nil)
  47. snapshotDirtyAccountReadMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/read", nil)
  48. snapshotDirtyAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/write", nil)
  49. snapshotDirtyStorageHitMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/hit", nil)
  50. snapshotDirtyStorageMissMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/miss", nil)
  51. snapshotDirtyStorageInexMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/inex", nil)
  52. snapshotDirtyStorageReadMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/read", nil)
  53. snapshotDirtyStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/write", nil)
  54. snapshotDirtyAccountHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/account/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015))
  55. snapshotFlushAccountItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/item", nil)
  56. snapshotFlushAccountSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/size", nil)
  57. snapshotFlushStorageItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/item", nil)
  58. snapshotFlushStorageSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/size", nil)
  59. snapshotBloomIndexTimer = metrics.NewRegisteredResettingTimer("state/snapshot/bloom/index", nil)
  60. snapshotBloomErrorGauge = metrics.NewRegisteredGaugeFloat64("state/snapshot/bloom/error", nil)
  61. snapshotBloomAccountTrueHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/truehit", nil)
  62. snapshotBloomAccountFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/falsehit", nil)
  63. snapshotBloomAccountMissMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/miss", nil)
  64. snapshotBloomStorageTrueHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/truehit", nil)
  65. snapshotBloomStorageFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/falsehit", nil)
  66. snapshotBloomStorageMissMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/miss", nil)
  67. // ErrSnapshotStale is returned from data accessors if the underlying snapshot
  68. // layer had been invalidated due to the chain progressing forward far enough
  69. // to not maintain the layer's original state.
  70. ErrSnapshotStale = errors.New("snapshot stale")
  71. // ErrNotCoveredYet is returned from data accessors if the underlying snapshot
  72. // is being generated currently and the requested data item is not yet in the
  73. // range of accounts covered.
  74. ErrNotCoveredYet = errors.New("not covered yet")
  75. // ErrNotConstructed is returned if the callers want to iterate the snapshot
  76. // while the generation is not finished yet.
  77. ErrNotConstructed = errors.New("snapshot is not constructed")
  78. // errSnapshotCycle is returned if a snapshot is attempted to be inserted
  79. // that forms a cycle in the snapshot tree.
  80. errSnapshotCycle = errors.New("snapshot cycle")
  81. )
  82. // Snapshot represents the functionality supported by a snapshot storage layer.
  83. type Snapshot interface {
  84. // Root returns the root hash for which this snapshot was made.
  85. Root() common.Hash
  86. // WaitAndGetVerifyRes will wait until the snapshot been verified and return verification result
  87. WaitAndGetVerifyRes() bool
  88. // Verified returns whether the snapshot is verified
  89. Verified() bool
  90. // Store the verification result
  91. MarkValid()
  92. // Account directly retrieves the account associated with a particular hash in
  93. // the snapshot slim data format.
  94. Account(hash common.Hash) (*Account, error)
  95. // AccountRLP directly retrieves the account RLP associated with a particular
  96. // hash in the snapshot slim data format.
  97. AccountRLP(hash common.Hash) ([]byte, error)
  98. // Storage directly retrieves the storage data associated with a particular hash,
  99. // within a particular account.
  100. Storage(accountHash, storageHash common.Hash) ([]byte, error)
  101. }
  102. // snapshot is the internal version of the snapshot data layer that supports some
  103. // additional methods compared to the public API.
  104. type snapshot interface {
  105. Snapshot
  106. // Parent returns the subsequent layer of a snapshot, or nil if the base was
  107. // reached.
  108. //
  109. // Note, the method is an internal helper to avoid type switching between the
  110. // disk and diff layers. There is no locking involved.
  111. Parent() snapshot
  112. // Update creates a new layer on top of the existing snapshot diff tree with
  113. // the specified data items.
  114. //
  115. // Note, the maps are retained by the method to avoid copying everything.
  116. Update(blockRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte, verified chan struct{}) *diffLayer
  117. // Journal commits an entire diff hierarchy to disk into a single journal entry.
  118. // This is meant to be used during shutdown to persist the snapshot without
  119. // flattening everything down (bad for reorgs).
  120. Journal(buffer *bytes.Buffer) (common.Hash, error)
  121. // Stale return whether this layer has become stale (was flattened across) or
  122. // if it's still live.
  123. Stale() bool
  124. // AccountIterator creates an account iterator over an arbitrary layer.
  125. AccountIterator(seek common.Hash) AccountIterator
  126. // StorageIterator creates a storage iterator over an arbitrary layer.
  127. StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool)
  128. }
  129. // Tree is an Ethereum state snapshot tree. It consists of one persistent base
  130. // layer backed by a key-value store, on top of which arbitrarily many in-memory
  131. // diff layers are topped. The memory diffs can form a tree with branching, but
  132. // the disk layer is singleton and common to all. If a reorg goes deeper than the
  133. // disk layer, everything needs to be deleted.
  134. //
  135. // The goal of a state snapshot is twofold: to allow direct access to account and
  136. // storage data to avoid expensive multi-level trie lookups; and to allow sorted,
  137. // cheap iteration of the account/storage tries for sync aid.
  138. type Tree struct {
  139. diskdb ethdb.KeyValueStore // Persistent database to store the snapshot
  140. triedb *trie.Database // In-memory cache to access the trie through
  141. cache int // Megabytes permitted to use for read caches
  142. layers map[common.Hash]snapshot // Collection of all known layers
  143. lock sync.RWMutex
  144. capLimit int
  145. }
  146. // New attempts to load an already existing snapshot from a persistent key-value
  147. // store (with a number of memory layers from a journal), ensuring that the head
  148. // of the snapshot matches the expected one.
  149. //
  150. // If the snapshot is missing or the disk layer is broken, the entire is deleted
  151. // and will be reconstructed from scratch based on the tries in the key-value
  152. // store, on a background thread. If the memory layers from the journal is not
  153. // continuous with disk layer or the journal is missing, all diffs will be discarded
  154. // iff it's in "recovery" mode, otherwise rebuild is mandatory.
  155. func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache, cap int, root common.Hash, async bool, rebuild bool, recovery bool) (*Tree, error) {
  156. // Create a new, empty snapshot tree
  157. snap := &Tree{
  158. diskdb: diskdb,
  159. triedb: triedb,
  160. cache: cache,
  161. capLimit: cap,
  162. layers: make(map[common.Hash]snapshot),
  163. }
  164. if !async {
  165. defer snap.waitBuild()
  166. }
  167. // Attempt to load a previously persisted snapshot and rebuild one if failed
  168. head, disabled, err := loadSnapshot(diskdb, triedb, cache, root, recovery)
  169. if disabled {
  170. log.Warn("Snapshot maintenance disabled (syncing)")
  171. return snap, nil
  172. }
  173. if err != nil {
  174. if rebuild {
  175. log.Warn("Failed to load snapshot, regenerating", "err", err)
  176. snap.Rebuild(root)
  177. return snap, nil
  178. }
  179. return nil, err // Bail out the error, don't rebuild automatically.
  180. }
  181. // Existing snapshot loaded, seed all the layers
  182. for head != nil {
  183. snap.layers[head.Root()] = head
  184. head = head.Parent()
  185. }
  186. log.Info("Snapshot loaded", "diskRoot", snap.diskRoot(), "root", root)
  187. return snap, nil
  188. }
  189. // waitBuild blocks until the snapshot finishes rebuilding. This method is meant
  190. // to be used by tests to ensure we're testing what we believe we are.
  191. func (t *Tree) waitBuild() {
  192. // Find the rebuild termination channel
  193. var done chan struct{}
  194. t.lock.RLock()
  195. for _, layer := range t.layers {
  196. if layer, ok := layer.(*diskLayer); ok {
  197. done = layer.genPending
  198. break
  199. }
  200. }
  201. t.lock.RUnlock()
  202. // Wait until the snapshot is generated
  203. if done != nil {
  204. <-done
  205. }
  206. }
  207. // Disable interrupts any pending snapshot generator, deletes all the snapshot
  208. // layers in memory and marks snapshots disabled globally. In order to resume
  209. // the snapshot functionality, the caller must invoke Rebuild.
  210. func (t *Tree) Disable() {
  211. // Interrupt any live snapshot layers
  212. t.lock.Lock()
  213. defer t.lock.Unlock()
  214. for _, layer := range t.layers {
  215. switch layer := layer.(type) {
  216. case *diskLayer:
  217. // If the base layer is generating, abort it
  218. if layer.genAbort != nil {
  219. abort := make(chan *generatorStats)
  220. layer.genAbort <- abort
  221. <-abort
  222. }
  223. // Layer should be inactive now, mark it as stale
  224. layer.lock.Lock()
  225. layer.stale = true
  226. layer.lock.Unlock()
  227. case *diffLayer:
  228. // If the layer is a simple diff, simply mark as stale
  229. layer.lock.Lock()
  230. atomic.StoreUint32(&layer.stale, 1)
  231. layer.lock.Unlock()
  232. default:
  233. panic(fmt.Sprintf("unknown layer type: %T", layer))
  234. }
  235. }
  236. t.layers = map[common.Hash]snapshot{}
  237. // Delete all snapshot liveness information from the database
  238. batch := t.diskdb.NewBatch()
  239. rawdb.WriteSnapshotDisabled(batch)
  240. rawdb.DeleteSnapshotRoot(batch)
  241. rawdb.DeleteSnapshotJournal(batch)
  242. rawdb.DeleteSnapshotGenerator(batch)
  243. rawdb.DeleteSnapshotRecoveryNumber(batch)
  244. // Note, we don't delete the sync progress
  245. if err := batch.Write(); err != nil {
  246. log.Crit("Failed to disable snapshots", "err", err)
  247. }
  248. }
  249. // Snapshot retrieves a snapshot belonging to the given block root, or nil if no
  250. // snapshot is maintained for that block.
  251. func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot {
  252. t.lock.RLock()
  253. defer t.lock.RUnlock()
  254. return t.layers[blockRoot]
  255. }
  256. // Snapshots returns all visited layers from the topmost layer with specific
  257. // root and traverses downward. The layer amount is limited by the given number.
  258. // If nodisk is set, then disk layer is excluded.
  259. func (t *Tree) Snapshots(root common.Hash, limits int, nodisk bool) []Snapshot {
  260. t.lock.RLock()
  261. defer t.lock.RUnlock()
  262. if limits == 0 {
  263. return nil
  264. }
  265. layer := t.layers[root]
  266. if layer == nil {
  267. return nil
  268. }
  269. var ret []Snapshot
  270. for {
  271. if _, isdisk := layer.(*diskLayer); isdisk && nodisk {
  272. break
  273. }
  274. ret = append(ret, layer)
  275. limits -= 1
  276. if limits == 0 {
  277. break
  278. }
  279. parent := layer.Parent()
  280. if parent == nil {
  281. break
  282. }
  283. layer = parent
  284. }
  285. return ret
  286. }
  287. func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, destructs map[common.Address]struct{}, accounts map[common.Address][]byte, storage map[common.Address]map[string][]byte, verified chan struct{}) error {
  288. hashDestructs, hashAccounts, hashStorage := transformSnapData(destructs, accounts, storage)
  289. return t.update(blockRoot, parentRoot, hashDestructs, hashAccounts, hashStorage, verified)
  290. }
  291. // Update adds a new snapshot into the tree, if that can be linked to an existing
  292. // old parent. It is disallowed to insert a disk layer (the origin of all).
  293. func (t *Tree) update(blockRoot common.Hash, parentRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte, verified chan struct{}) error {
  294. // Reject noop updates to avoid self-loops in the snapshot tree. This is a
  295. // special case that can only happen for Clique networks where empty blocks
  296. // don't modify the state (0 block subsidy).
  297. //
  298. // Although we could silently ignore this internally, it should be the caller's
  299. // responsibility to avoid even attempting to insert such a snapshot.
  300. if blockRoot == parentRoot {
  301. return errSnapshotCycle
  302. }
  303. // Generate a new snapshot on top of the parent
  304. parent := t.Snapshot(parentRoot)
  305. if parent == nil {
  306. return fmt.Errorf("parent [%#x] snapshot missing", parentRoot)
  307. }
  308. snap := parent.(snapshot).Update(blockRoot, destructs, accounts, storage, verified)
  309. // Save the new snapshot for later
  310. t.lock.Lock()
  311. defer t.lock.Unlock()
  312. t.layers[snap.root] = snap
  313. log.Debug("Snapshot updated", "blockRoot", blockRoot)
  314. return nil
  315. }
  316. func (t *Tree) CapLimit() int {
  317. return t.capLimit
  318. }
  319. // Cap traverses downwards the snapshot tree from a head block hash until the
  320. // number of allowed layers are crossed. All layers beyond the permitted number
  321. // are flattened downwards.
  322. //
  323. // Note, the final diff layer count in general will be one more than the amount
  324. // requested. This happens because the bottom-most diff layer is the accumulator
  325. // which may or may not overflow and cascade to disk. Since this last layer's
  326. // survival is only known *after* capping, we need to omit it from the count if
  327. // we want to ensure that *at least* the requested number of diff layers remain.
  328. func (t *Tree) Cap(root common.Hash, layers int) error {
  329. // Retrieve the head snapshot to cap from
  330. snap := t.Snapshot(root)
  331. if snap == nil {
  332. return fmt.Errorf("snapshot [%#x] missing", root)
  333. }
  334. diff, ok := snap.(*diffLayer)
  335. if !ok {
  336. return fmt.Errorf("snapshot [%#x] is disk layer", root)
  337. }
  338. // If the generator is still running, use a more aggressive cap
  339. diff.origin.lock.RLock()
  340. if diff.origin.genMarker != nil && layers > 8 {
  341. layers = 8
  342. }
  343. diff.origin.lock.RUnlock()
  344. // Run the internal capping and discard all stale layers
  345. t.lock.Lock()
  346. defer t.lock.Unlock()
  347. // Flattening the bottom-most diff layer requires special casing since there's
  348. // no child to rewire to the grandparent. In that case we can fake a temporary
  349. // child for the capping and then remove it.
  350. if layers == 0 {
  351. // If full commit was requested, flatten the diffs and merge onto disk
  352. diff.lock.RLock()
  353. base := diffToDisk(diff.flatten().(*diffLayer))
  354. diff.lock.RUnlock()
  355. // Replace the entire snapshot tree with the flat base
  356. t.layers = map[common.Hash]snapshot{base.root: base}
  357. return nil
  358. }
  359. persisted := t.cap(diff, layers)
  360. // Remove any layer that is stale or links into a stale layer
  361. children := make(map[common.Hash][]common.Hash)
  362. for root, snap := range t.layers {
  363. if diff, ok := snap.(*diffLayer); ok {
  364. parent := diff.parent.Root()
  365. children[parent] = append(children[parent], root)
  366. }
  367. }
  368. var remove func(root common.Hash)
  369. remove = func(root common.Hash) {
  370. delete(t.layers, root)
  371. for _, child := range children[root] {
  372. remove(child)
  373. }
  374. delete(children, root)
  375. }
  376. for root, snap := range t.layers {
  377. if snap.Stale() {
  378. remove(root)
  379. }
  380. }
  381. // If the disk layer was modified, regenerate all the cumulative blooms
  382. if persisted != nil {
  383. var rebloom func(root common.Hash)
  384. rebloom = func(root common.Hash) {
  385. if diff, ok := t.layers[root].(*diffLayer); ok {
  386. diff.rebloom(persisted)
  387. }
  388. for _, child := range children[root] {
  389. rebloom(child)
  390. }
  391. }
  392. rebloom(persisted.root)
  393. }
  394. log.Debug("Snapshot capped", "root", root)
  395. return nil
  396. }
  397. // cap traverses downwards the diff tree until the number of allowed layers are
  398. // crossed. All diffs beyond the permitted number are flattened downwards. If the
  399. // layer limit is reached, memory cap is also enforced (but not before).
  400. //
  401. // The method returns the new disk layer if diffs were persisted into it.
  402. //
  403. // Note, the final diff layer count in general will be one more than the amount
  404. // requested. This happens because the bottom-most diff layer is the accumulator
  405. // which may or may not overflow and cascade to disk. Since this last layer's
  406. // survival is only known *after* capping, we need to omit it from the count if
  407. // we want to ensure that *at least* the requested number of diff layers remain.
  408. func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer {
  409. // Dive until we run out of layers or reach the persistent database
  410. for i := 0; i < layers-1; i++ {
  411. // If we still have diff layers below, continue down
  412. if parent, ok := diff.parent.(*diffLayer); ok {
  413. diff = parent
  414. } else {
  415. // Diff stack too shallow, return without modifications
  416. return nil
  417. }
  418. }
  419. // We're out of layers, flatten anything below, stopping if it's the disk or if
  420. // the memory limit is not yet exceeded.
  421. switch parent := diff.parent.(type) {
  422. case *diskLayer:
  423. return nil
  424. case *diffLayer:
  425. // Flatten the parent into the grandparent. The flattening internally obtains a
  426. // write lock on grandparent.
  427. flattened := parent.flatten().(*diffLayer)
  428. t.layers[flattened.root] = flattened
  429. diff.lock.Lock()
  430. defer diff.lock.Unlock()
  431. diff.parent = flattened
  432. if flattened.memory < aggregatorMemoryLimit {
  433. // Accumulator layer is smaller than the limit, so we can abort, unless
  434. // there's a snapshot being generated currently. In that case, the trie
  435. // will move fron underneath the generator so we **must** merge all the
  436. // partial data down into the snapshot and restart the generation.
  437. if flattened.parent.(*diskLayer).genAbort == nil {
  438. return nil
  439. }
  440. }
  441. default:
  442. panic(fmt.Sprintf("unknown data layer: %T", parent))
  443. }
  444. // If the bottom-most layer is larger than our memory cap, persist to disk
  445. bottom := diff.parent.(*diffLayer)
  446. bottom.lock.RLock()
  447. base := diffToDisk(bottom)
  448. bottom.lock.RUnlock()
  449. t.layers[base.root] = base
  450. diff.parent = base
  451. return base
  452. }
  453. // diffToDisk merges a bottom-most diff into the persistent disk layer underneath
  454. // it. The method will panic if called onto a non-bottom-most diff layer.
  455. //
  456. // The disk layer persistence should be operated in an atomic way. All updates should
  457. // be discarded if the whole transition if not finished.
  458. func diffToDisk(bottom *diffLayer) *diskLayer {
  459. var (
  460. base = bottom.parent.(*diskLayer)
  461. batch = base.diskdb.NewBatch()
  462. stats *generatorStats
  463. )
  464. // If the disk layer is running a snapshot generator, abort it
  465. if base.genAbort != nil {
  466. abort := make(chan *generatorStats)
  467. base.genAbort <- abort
  468. stats = <-abort
  469. }
  470. // Put the deletion in the batch writer, flush all updates in the final step.
  471. rawdb.DeleteSnapshotRoot(batch)
  472. // Mark the original base as stale as we're going to create a new wrapper
  473. base.lock.Lock()
  474. if base.stale {
  475. panic("parent disk layer is stale") // we've committed into the same base from two children, boo
  476. }
  477. base.stale = true
  478. base.lock.Unlock()
  479. // Destroy all the destructed accounts from the database
  480. for hash := range bottom.destructSet {
  481. // Skip any account not covered yet by the snapshot
  482. if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 {
  483. continue
  484. }
  485. // Remove all storage slots
  486. rawdb.DeleteAccountSnapshot(batch, hash)
  487. base.cache.Set(hash[:], nil)
  488. it := rawdb.IterateStorageSnapshots(base.diskdb, hash)
  489. for it.Next() {
  490. if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator
  491. batch.Delete(key)
  492. base.cache.Del(key[1:])
  493. snapshotFlushStorageItemMeter.Mark(1)
  494. // Ensure we don't delete too much data blindly (contract can be
  495. // huge). It's ok to flush, the root will go missing in case of a
  496. // crash and we'll detect and regenerate the snapshot.
  497. if batch.ValueSize() > ethdb.IdealBatchSize {
  498. if err := batch.Write(); err != nil {
  499. log.Crit("Failed to write storage deletions", "err", err)
  500. }
  501. batch.Reset()
  502. }
  503. }
  504. }
  505. it.Release()
  506. }
  507. // Push all updated accounts into the database
  508. for hash, data := range bottom.accountData {
  509. // Skip any account not covered yet by the snapshot
  510. if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 {
  511. continue
  512. }
  513. // Push the account to disk
  514. rawdb.WriteAccountSnapshot(batch, hash, data)
  515. base.cache.Set(hash[:], data)
  516. snapshotCleanAccountWriteMeter.Mark(int64(len(data)))
  517. snapshotFlushAccountItemMeter.Mark(1)
  518. snapshotFlushAccountSizeMeter.Mark(int64(len(data)))
  519. // Ensure we don't write too much data blindly. It's ok to flush, the
  520. // root will go missing in case of a crash and we'll detect and regen
  521. // the snapshot.
  522. if batch.ValueSize() > ethdb.IdealBatchSize {
  523. if err := batch.Write(); err != nil {
  524. log.Crit("Failed to write storage deletions", "err", err)
  525. }
  526. batch.Reset()
  527. }
  528. }
  529. // Push all the storage slots into the database
  530. for accountHash, storage := range bottom.storageData {
  531. // Skip any account not covered yet by the snapshot
  532. if base.genMarker != nil && bytes.Compare(accountHash[:], base.genMarker) > 0 {
  533. continue
  534. }
  535. // Generation might be mid-account, track that case too
  536. midAccount := base.genMarker != nil && bytes.Equal(accountHash[:], base.genMarker[:common.HashLength])
  537. for storageHash, data := range storage {
  538. // Skip any slot not covered yet by the snapshot
  539. if midAccount && bytes.Compare(storageHash[:], base.genMarker[common.HashLength:]) > 0 {
  540. continue
  541. }
  542. if len(data) > 0 {
  543. rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data)
  544. base.cache.Set(append(accountHash[:], storageHash[:]...), data)
  545. snapshotCleanStorageWriteMeter.Mark(int64(len(data)))
  546. } else {
  547. rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash)
  548. base.cache.Set(append(accountHash[:], storageHash[:]...), nil)
  549. }
  550. snapshotFlushStorageItemMeter.Mark(1)
  551. snapshotFlushStorageSizeMeter.Mark(int64(len(data)))
  552. }
  553. }
  554. // Update the snapshot block marker and write any remainder data
  555. rawdb.WriteSnapshotRoot(batch, bottom.root)
  556. // Write out the generator progress marker and report
  557. journalProgress(batch, base.genMarker, stats)
  558. // Flush all the updates in the single db operation. Ensure the
  559. // disk layer transition is atomic.
  560. if err := batch.Write(); err != nil {
  561. log.Crit("Failed to write leftover snapshot", "err", err)
  562. }
  563. log.Debug("Journalled disk layer", "root", bottom.root, "complete", base.genMarker == nil)
  564. res := &diskLayer{
  565. root: bottom.root,
  566. cache: base.cache,
  567. diskdb: base.diskdb,
  568. triedb: base.triedb,
  569. genMarker: base.genMarker,
  570. genPending: base.genPending,
  571. }
  572. // If snapshot generation hasn't finished yet, port over all the starts and
  573. // continue where the previous round left off.
  574. //
  575. // Note, the `base.genAbort` comparison is not used normally, it's checked
  576. // to allow the tests to play with the marker without triggering this path.
  577. if base.genMarker != nil && base.genAbort != nil {
  578. res.genMarker = base.genMarker
  579. res.genAbort = make(chan chan *generatorStats)
  580. go res.generate(stats)
  581. }
  582. return res
  583. }
  584. // Journal commits an entire diff hierarchy to disk into a single journal entry.
  585. // This is meant to be used during shutdown to persist the snapshot without
  586. // flattening everything down (bad for reorgs).
  587. //
  588. // The method returns the root hash of the base layer that needs to be persisted
  589. // to disk as a trie too to allow continuing any pending generation op.
  590. func (t *Tree) Journal(root common.Hash) (common.Hash, error) {
  591. // Retrieve the head snapshot to journal from var snap snapshot
  592. snap := t.Snapshot(root)
  593. if snap == nil {
  594. return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root)
  595. }
  596. // Run the journaling
  597. t.lock.Lock()
  598. defer t.lock.Unlock()
  599. // Firstly write out the metadata of journal
  600. journal := new(bytes.Buffer)
  601. if err := rlp.Encode(journal, journalVersion); err != nil {
  602. return common.Hash{}, err
  603. }
  604. diskroot := t.diskRoot()
  605. if diskroot == (common.Hash{}) {
  606. return common.Hash{}, errors.New("invalid disk root")
  607. }
  608. // Secondly write out the disk layer root, ensure the
  609. // diff journal is continuous with disk.
  610. if err := rlp.Encode(journal, diskroot); err != nil {
  611. return common.Hash{}, err
  612. }
  613. // Finally write out the journal of each layer in reverse order.
  614. base, err := snap.(snapshot).Journal(journal)
  615. if err != nil {
  616. return common.Hash{}, err
  617. }
  618. // Store the journal into the database and return
  619. rawdb.WriteSnapshotJournal(t.diskdb, journal.Bytes())
  620. return base, nil
  621. }
  622. // Rebuild wipes all available snapshot data from the persistent database and
  623. // discard all caches and diff layers. Afterwards, it starts a new snapshot
  624. // generator with the given root hash.
  625. func (t *Tree) Rebuild(root common.Hash) {
  626. t.lock.Lock()
  627. defer t.lock.Unlock()
  628. // Firstly delete any recovery flag in the database. Because now we are
  629. // building a brand new snapshot. Also reenable the snapshot feature.
  630. rawdb.DeleteSnapshotRecoveryNumber(t.diskdb)
  631. rawdb.DeleteSnapshotDisabled(t.diskdb)
  632. // Iterate over and mark all layers stale
  633. for _, layer := range t.layers {
  634. switch layer := layer.(type) {
  635. case *diskLayer:
  636. // If the base layer is generating, abort it and save
  637. if layer.genAbort != nil {
  638. abort := make(chan *generatorStats)
  639. layer.genAbort <- abort
  640. <-abort
  641. }
  642. // Layer should be inactive now, mark it as stale
  643. layer.lock.Lock()
  644. layer.stale = true
  645. layer.lock.Unlock()
  646. case *diffLayer:
  647. // If the layer is a simple diff, simply mark as stale
  648. layer.lock.Lock()
  649. atomic.StoreUint32(&layer.stale, 1)
  650. layer.lock.Unlock()
  651. default:
  652. panic(fmt.Sprintf("unknown layer type: %T", layer))
  653. }
  654. }
  655. // Start generating a new snapshot from scratch on a background thread. The
  656. // generator will run a wiper first if there's not one running right now.
  657. log.Info("Rebuilding state snapshot")
  658. t.layers = map[common.Hash]snapshot{
  659. root: generateSnapshot(t.diskdb, t.triedb, t.cache, root),
  660. }
  661. }
  662. // AccountIterator creates a new account iterator for the specified root hash and
  663. // seeks to a starting account hash.
  664. func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) {
  665. ok, err := t.generating()
  666. if err != nil {
  667. return nil, err
  668. }
  669. if ok {
  670. return nil, ErrNotConstructed
  671. }
  672. return newFastAccountIterator(t, root, seek)
  673. }
  674. // StorageIterator creates a new storage iterator for the specified root hash and
  675. // account. The iterator will be move to the specific start position.
  676. func (t *Tree) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) {
  677. ok, err := t.generating()
  678. if err != nil {
  679. return nil, err
  680. }
  681. if ok {
  682. return nil, ErrNotConstructed
  683. }
  684. return newFastStorageIterator(t, root, account, seek)
  685. }
  686. // Verify iterates the whole state(all the accounts as well as the corresponding storages)
  687. // with the specific root and compares the re-computed hash with the original one.
  688. func (t *Tree) Verify(root common.Hash) error {
  689. acctIt, err := t.AccountIterator(root, common.Hash{})
  690. if err != nil {
  691. return err
  692. }
  693. defer acctIt.Release()
  694. got, err := generateTrieRoot(nil, acctIt, common.Hash{}, stackTrieGenerate, func(db ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) {
  695. storageIt, err := t.StorageIterator(root, accountHash, common.Hash{})
  696. if err != nil {
  697. return common.Hash{}, err
  698. }
  699. defer storageIt.Release()
  700. hash, err := generateTrieRoot(nil, storageIt, accountHash, stackTrieGenerate, nil, stat, false)
  701. if err != nil {
  702. return common.Hash{}, err
  703. }
  704. return hash, nil
  705. }, newGenerateStats(), true)
  706. if err != nil {
  707. return err
  708. }
  709. if got != root {
  710. return fmt.Errorf("state root hash mismatch: got %x, want %x", got, root)
  711. }
  712. return nil
  713. }
  714. // disklayer is an internal helper function to return the disk layer.
  715. // The lock of snapTree is assumed to be held already.
  716. func (t *Tree) disklayer() *diskLayer {
  717. var snap snapshot
  718. for _, s := range t.layers {
  719. snap = s
  720. break
  721. }
  722. if snap == nil {
  723. return nil
  724. }
  725. switch layer := snap.(type) {
  726. case *diskLayer:
  727. return layer
  728. case *diffLayer:
  729. return layer.origin
  730. default:
  731. panic(fmt.Sprintf("%T: undefined layer", snap))
  732. }
  733. }
  734. // diskRoot is a internal helper function to return the disk layer root.
  735. // The lock of snapTree is assumed to be held already.
  736. func (t *Tree) diskRoot() common.Hash {
  737. disklayer := t.disklayer()
  738. if disklayer == nil {
  739. return common.Hash{}
  740. }
  741. return disklayer.Root()
  742. }
  743. // generating is an internal helper function which reports whether the snapshot
  744. // is still under the construction.
  745. func (t *Tree) generating() (bool, error) {
  746. t.lock.Lock()
  747. defer t.lock.Unlock()
  748. layer := t.disklayer()
  749. if layer == nil {
  750. return false, errors.New("disk layer is missing")
  751. }
  752. layer.lock.RLock()
  753. defer layer.lock.RUnlock()
  754. return layer.genMarker != nil, nil
  755. }
  756. // diskRoot is a external helper function to return the disk layer root.
  757. func (t *Tree) DiskRoot() common.Hash {
  758. t.lock.Lock()
  759. defer t.lock.Unlock()
  760. return t.diskRoot()
  761. }
  762. // TODO we can further improve it when the set is very large
  763. func transformSnapData(destructs map[common.Address]struct{}, accounts map[common.Address][]byte,
  764. storage map[common.Address]map[string][]byte) (map[common.Hash]struct{}, map[common.Hash][]byte,
  765. map[common.Hash]map[common.Hash][]byte) {
  766. hasher := crypto.NewKeccakState()
  767. hashDestructs := make(map[common.Hash]struct{}, len(destructs))
  768. hashAccounts := make(map[common.Hash][]byte, len(accounts))
  769. hashStorages := make(map[common.Hash]map[common.Hash][]byte, len(storage))
  770. for addr := range destructs {
  771. hashDestructs[crypto.Keccak256Hash(addr[:])] = struct{}{}
  772. }
  773. for addr, account := range accounts {
  774. hashAccounts[crypto.Keccak256Hash(addr[:])] = account
  775. }
  776. for addr, accountStore := range storage {
  777. hashStorage := make(map[common.Hash][]byte, len(accountStore))
  778. for k, v := range accountStore {
  779. hashStorage[crypto.HashData(hasher, []byte(k))] = v
  780. }
  781. hashStorages[crypto.Keccak256Hash(addr[:])] = hashStorage
  782. }
  783. return hashDestructs, hashAccounts, hashStorages
  784. }