snapshot.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750
  1. // Copyright 2019 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. // Package snapshot implements a journalled, dynamic state dump.
  17. package snapshot
  18. import (
  19. "bytes"
  20. "errors"
  21. "fmt"
  22. "sync"
  23. "sync/atomic"
  24. "github.com/ethereum/go-ethereum/common"
  25. "github.com/ethereum/go-ethereum/core/rawdb"
  26. "github.com/ethereum/go-ethereum/ethdb"
  27. "github.com/ethereum/go-ethereum/log"
  28. "github.com/ethereum/go-ethereum/metrics"
  29. "github.com/ethereum/go-ethereum/rlp"
  30. "github.com/ethereum/go-ethereum/trie"
  31. )
  32. var (
  33. snapshotCleanAccountHitMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/hit", nil)
  34. snapshotCleanAccountMissMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/miss", nil)
  35. snapshotCleanAccountInexMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/inex", nil)
  36. snapshotCleanAccountReadMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/read", nil)
  37. snapshotCleanAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/write", nil)
  38. snapshotCleanStorageHitMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/hit", nil)
  39. snapshotCleanStorageMissMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/miss", nil)
  40. snapshotCleanStorageInexMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/inex", nil)
  41. snapshotCleanStorageReadMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/read", nil)
  42. snapshotCleanStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/write", nil)
  43. snapshotDirtyAccountHitMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/hit", nil)
  44. snapshotDirtyAccountMissMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/miss", nil)
  45. snapshotDirtyAccountInexMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/inex", nil)
  46. snapshotDirtyAccountReadMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/read", nil)
  47. snapshotDirtyAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/write", nil)
  48. snapshotDirtyStorageHitMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/hit", nil)
  49. snapshotDirtyStorageMissMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/miss", nil)
  50. snapshotDirtyStorageInexMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/inex", nil)
  51. snapshotDirtyStorageReadMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/read", nil)
  52. snapshotDirtyStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/write", nil)
  53. snapshotDirtyAccountHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/account/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015))
  54. snapshotDirtyStorageHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/storage/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015))
  55. snapshotFlushAccountItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/item", nil)
  56. snapshotFlushAccountSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/size", nil)
  57. snapshotFlushStorageItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/item", nil)
  58. snapshotFlushStorageSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/size", nil)
  59. snapshotBloomIndexTimer = metrics.NewRegisteredResettingTimer("state/snapshot/bloom/index", nil)
  60. snapshotBloomErrorGauge = metrics.NewRegisteredGaugeFloat64("state/snapshot/bloom/error", nil)
  61. snapshotBloomAccountTrueHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/truehit", nil)
  62. snapshotBloomAccountFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/falsehit", nil)
  63. snapshotBloomAccountMissMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/miss", nil)
  64. snapshotBloomStorageTrueHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/truehit", nil)
  65. snapshotBloomStorageFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/falsehit", nil)
  66. snapshotBloomStorageMissMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/miss", nil)
  67. // ErrSnapshotStale is returned from data accessors if the underlying snapshot
  68. // layer had been invalidated due to the chain progressing forward far enough
  69. // to not maintain the layer's original state.
  70. ErrSnapshotStale = errors.New("snapshot stale")
  71. // ErrNotCoveredYet is returned from data accessors if the underlying snapshot
  72. // is being generated currently and the requested data item is not yet in the
  73. // range of accounts covered.
  74. ErrNotCoveredYet = errors.New("not covered yet")
  75. // ErrNotConstructed is returned if the callers want to iterate the snapshot
  76. // while the generation is not finished yet.
  77. ErrNotConstructed = errors.New("snapshot is not constructed")
  78. // errSnapshotCycle is returned if a snapshot is attempted to be inserted
  79. // that forms a cycle in the snapshot tree.
  80. errSnapshotCycle = errors.New("snapshot cycle")
  81. )
  82. // Snapshot represents the functionality supported by a snapshot storage layer.
  83. type Snapshot interface {
  84. // Root returns the root hash for which this snapshot was made.
  85. Root() common.Hash
  86. // Account directly retrieves the account associated with a particular hash in
  87. // the snapshot slim data format.
  88. Account(hash common.Hash) (*Account, error)
  89. // AccountRLP directly retrieves the account RLP associated with a particular
  90. // hash in the snapshot slim data format.
  91. AccountRLP(hash common.Hash) ([]byte, error)
  92. // Storage directly retrieves the storage data associated with a particular hash,
  93. // within a particular account.
  94. Storage(accountHash, storageHash common.Hash) ([]byte, error)
  95. }
  96. // snapshot is the internal version of the snapshot data layer that supports some
  97. // additional methods compared to the public API.
  98. type snapshot interface {
  99. Snapshot
  100. // Parent returns the subsequent layer of a snapshot, or nil if the base was
  101. // reached.
  102. //
  103. // Note, the method is an internal helper to avoid type switching between the
  104. // disk and diff layers. There is no locking involved.
  105. Parent() snapshot
  106. // Update creates a new layer on top of the existing snapshot diff tree with
  107. // the specified data items.
  108. //
  109. // Note, the maps are retained by the method to avoid copying everything.
  110. Update(blockRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer
  111. // Journal commits an entire diff hierarchy to disk into a single journal entry.
  112. // This is meant to be used during shutdown to persist the snapshot without
  113. // flattening everything down (bad for reorgs).
  114. Journal(buffer *bytes.Buffer) (common.Hash, error)
  115. // LegacyJournal is basically identical to Journal. it's the legacy version for
  116. // flushing legacy journal. Now the only purpose of this function is for testing.
  117. LegacyJournal(buffer *bytes.Buffer) (common.Hash, error)
  118. // Stale return whether this layer has become stale (was flattened across) or
  119. // if it's still live.
  120. Stale() bool
  121. // AccountIterator creates an account iterator over an arbitrary layer.
  122. AccountIterator(seek common.Hash) AccountIterator
  123. // StorageIterator creates a storage iterator over an arbitrary layer.
  124. StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool)
  125. }
  126. // SnapshotTree is an Ethereum state snapshot tree. It consists of one persistent
  127. // base layer backed by a key-value store, on top of which arbitrarily many in-
  128. // memory diff layers are topped. The memory diffs can form a tree with branching,
  129. // but the disk layer is singleton and common to all. If a reorg goes deeper than
  130. // the disk layer, everything needs to be deleted.
  131. //
  132. // The goal of a state snapshot is twofold: to allow direct access to account and
  133. // storage data to avoid expensive multi-level trie lookups; and to allow sorted,
  134. // cheap iteration of the account/storage tries for sync aid.
  135. type Tree struct {
  136. diskdb ethdb.KeyValueStore // Persistent database to store the snapshot
  137. triedb *trie.Database // In-memory cache to access the trie through
  138. cache int // Megabytes permitted to use for read caches
  139. layers map[common.Hash]snapshot // Collection of all known layers
  140. lock sync.RWMutex
  141. }
  142. // New attempts to load an already existing snapshot from a persistent key-value
  143. // store (with a number of memory layers from a journal), ensuring that the head
  144. // of the snapshot matches the expected one.
  145. //
  146. // If the snapshot is missing or the disk layer is broken, the entire is deleted
  147. // and will be reconstructed from scratch based on the tries in the key-value
  148. // store, on a background thread. If the memory layers from the journal is not
  149. // continuous with disk layer or the journal is missing, all diffs will be discarded
  150. // iff it's in "recovery" mode, otherwise rebuild is mandatory.
  151. func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool, recovery bool) *Tree {
  152. // Create a new, empty snapshot tree
  153. snap := &Tree{
  154. diskdb: diskdb,
  155. triedb: triedb,
  156. cache: cache,
  157. layers: make(map[common.Hash]snapshot),
  158. }
  159. if !async {
  160. defer snap.waitBuild()
  161. }
  162. // Attempt to load a previously persisted snapshot and rebuild one if failed
  163. head, err := loadSnapshot(diskdb, triedb, cache, root, recovery)
  164. if err != nil {
  165. log.Warn("Failed to load snapshot, regenerating", "err", err)
  166. snap.Rebuild(root)
  167. return snap
  168. }
  169. // Existing snapshot loaded, seed all the layers
  170. for head != nil {
  171. snap.layers[head.Root()] = head
  172. head = head.Parent()
  173. }
  174. return snap
  175. }
  176. // waitBuild blocks until the snapshot finishes rebuilding. This method is meant
  177. // to be used by tests to ensure we're testing what we believe we are.
  178. func (t *Tree) waitBuild() {
  179. // Find the rebuild termination channel
  180. var done chan struct{}
  181. t.lock.RLock()
  182. for _, layer := range t.layers {
  183. if layer, ok := layer.(*diskLayer); ok {
  184. done = layer.genPending
  185. break
  186. }
  187. }
  188. t.lock.RUnlock()
  189. // Wait until the snapshot is generated
  190. if done != nil {
  191. <-done
  192. }
  193. }
  194. // Snapshot retrieves a snapshot belonging to the given block root, or nil if no
  195. // snapshot is maintained for that block.
  196. func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot {
  197. t.lock.RLock()
  198. defer t.lock.RUnlock()
  199. return t.layers[blockRoot]
  200. }
  201. // Update adds a new snapshot into the tree, if that can be linked to an existing
  202. // old parent. It is disallowed to insert a disk layer (the origin of all).
  203. func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error {
  204. // Reject noop updates to avoid self-loops in the snapshot tree. This is a
  205. // special case that can only happen for Clique networks where empty blocks
  206. // don't modify the state (0 block subsidy).
  207. //
  208. // Although we could silently ignore this internally, it should be the caller's
  209. // responsibility to avoid even attempting to insert such a snapshot.
  210. if blockRoot == parentRoot {
  211. return errSnapshotCycle
  212. }
  213. // Generate a new snapshot on top of the parent
  214. parent := t.Snapshot(parentRoot).(snapshot)
  215. if parent == nil {
  216. return fmt.Errorf("parent [%#x] snapshot missing", parentRoot)
  217. }
  218. snap := parent.Update(blockRoot, destructs, accounts, storage)
  219. // Save the new snapshot for later
  220. t.lock.Lock()
  221. defer t.lock.Unlock()
  222. t.layers[snap.root] = snap
  223. return nil
  224. }
  225. // Cap traverses downwards the snapshot tree from a head block hash until the
  226. // number of allowed layers are crossed. All layers beyond the permitted number
  227. // are flattened downwards.
  228. func (t *Tree) Cap(root common.Hash, layers int) error {
  229. // Retrieve the head snapshot to cap from
  230. snap := t.Snapshot(root)
  231. if snap == nil {
  232. return fmt.Errorf("snapshot [%#x] missing", root)
  233. }
  234. diff, ok := snap.(*diffLayer)
  235. if !ok {
  236. return fmt.Errorf("snapshot [%#x] is disk layer", root)
  237. }
  238. // If the generator is still running, use a more aggressive cap
  239. diff.origin.lock.RLock()
  240. if diff.origin.genMarker != nil && layers > 8 {
  241. layers = 8
  242. }
  243. diff.origin.lock.RUnlock()
  244. // Run the internal capping and discard all stale layers
  245. t.lock.Lock()
  246. defer t.lock.Unlock()
  247. // Flattening the bottom-most diff layer requires special casing since there's
  248. // no child to rewire to the grandparent. In that case we can fake a temporary
  249. // child for the capping and then remove it.
  250. var persisted *diskLayer
  251. switch layers {
  252. case 0:
  253. // If full commit was requested, flatten the diffs and merge onto disk
  254. diff.lock.RLock()
  255. base := diffToDisk(diff.flatten().(*diffLayer))
  256. diff.lock.RUnlock()
  257. // Replace the entire snapshot tree with the flat base
  258. t.layers = map[common.Hash]snapshot{base.root: base}
  259. return nil
  260. case 1:
  261. // If full flattening was requested, flatten the diffs but only merge if the
  262. // memory limit was reached
  263. var (
  264. bottom *diffLayer
  265. base *diskLayer
  266. )
  267. diff.lock.RLock()
  268. bottom = diff.flatten().(*diffLayer)
  269. if bottom.memory >= aggregatorMemoryLimit {
  270. base = diffToDisk(bottom)
  271. }
  272. diff.lock.RUnlock()
  273. // If all diff layers were removed, replace the entire snapshot tree
  274. if base != nil {
  275. t.layers = map[common.Hash]snapshot{base.root: base}
  276. return nil
  277. }
  278. // Merge the new aggregated layer into the snapshot tree, clean stales below
  279. t.layers[bottom.root] = bottom
  280. default:
  281. // Many layers requested to be retained, cap normally
  282. persisted = t.cap(diff, layers)
  283. }
  284. // Remove any layer that is stale or links into a stale layer
  285. children := make(map[common.Hash][]common.Hash)
  286. for root, snap := range t.layers {
  287. if diff, ok := snap.(*diffLayer); ok {
  288. parent := diff.parent.Root()
  289. children[parent] = append(children[parent], root)
  290. }
  291. }
  292. var remove func(root common.Hash)
  293. remove = func(root common.Hash) {
  294. delete(t.layers, root)
  295. for _, child := range children[root] {
  296. remove(child)
  297. }
  298. delete(children, root)
  299. }
  300. for root, snap := range t.layers {
  301. if snap.Stale() {
  302. remove(root)
  303. }
  304. }
  305. // If the disk layer was modified, regenerate all the cumulative blooms
  306. if persisted != nil {
  307. var rebloom func(root common.Hash)
  308. rebloom = func(root common.Hash) {
  309. if diff, ok := t.layers[root].(*diffLayer); ok {
  310. diff.rebloom(persisted)
  311. }
  312. for _, child := range children[root] {
  313. rebloom(child)
  314. }
  315. }
  316. rebloom(persisted.root)
  317. }
  318. return nil
  319. }
  320. // cap traverses downwards the diff tree until the number of allowed layers are
  321. // crossed. All diffs beyond the permitted number are flattened downwards. If the
  322. // layer limit is reached, memory cap is also enforced (but not before).
  323. //
  324. // The method returns the new disk layer if diffs were persistend into it.
  325. func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer {
  326. // Dive until we run out of layers or reach the persistent database
  327. for ; layers > 2; layers-- {
  328. // If we still have diff layers below, continue down
  329. if parent, ok := diff.parent.(*diffLayer); ok {
  330. diff = parent
  331. } else {
  332. // Diff stack too shallow, return without modifications
  333. return nil
  334. }
  335. }
  336. // We're out of layers, flatten anything below, stopping if it's the disk or if
  337. // the memory limit is not yet exceeded.
  338. switch parent := diff.parent.(type) {
  339. case *diskLayer:
  340. return nil
  341. case *diffLayer:
  342. // Flatten the parent into the grandparent. The flattening internally obtains a
  343. // write lock on grandparent.
  344. flattened := parent.flatten().(*diffLayer)
  345. t.layers[flattened.root] = flattened
  346. diff.lock.Lock()
  347. defer diff.lock.Unlock()
  348. diff.parent = flattened
  349. if flattened.memory < aggregatorMemoryLimit {
  350. // Accumulator layer is smaller than the limit, so we can abort, unless
  351. // there's a snapshot being generated currently. In that case, the trie
  352. // will move fron underneath the generator so we **must** merge all the
  353. // partial data down into the snapshot and restart the generation.
  354. if flattened.parent.(*diskLayer).genAbort == nil {
  355. return nil
  356. }
  357. }
  358. default:
  359. panic(fmt.Sprintf("unknown data layer: %T", parent))
  360. }
  361. // If the bottom-most layer is larger than our memory cap, persist to disk
  362. bottom := diff.parent.(*diffLayer)
  363. bottom.lock.RLock()
  364. base := diffToDisk(bottom)
  365. bottom.lock.RUnlock()
  366. t.layers[base.root] = base
  367. diff.parent = base
  368. return base
  369. }
  370. // diffToDisk merges a bottom-most diff into the persistent disk layer underneath
  371. // it. The method will panic if called onto a non-bottom-most diff layer.
  372. //
  373. // The disk layer persistence should be operated in an atomic way. All updates should
  374. // be discarded if the whole transition if not finished.
  375. func diffToDisk(bottom *diffLayer) *diskLayer {
  376. var (
  377. base = bottom.parent.(*diskLayer)
  378. batch = base.diskdb.NewBatch()
  379. stats *generatorStats
  380. )
  381. // If the disk layer is running a snapshot generator, abort it
  382. if base.genAbort != nil {
  383. abort := make(chan *generatorStats)
  384. base.genAbort <- abort
  385. stats = <-abort
  386. }
  387. // Put the deletion in the batch writer, flush all updates in the final step.
  388. rawdb.DeleteSnapshotRoot(batch)
  389. // Mark the original base as stale as we're going to create a new wrapper
  390. base.lock.Lock()
  391. if base.stale {
  392. panic("parent disk layer is stale") // we've committed into the same base from two children, boo
  393. }
  394. base.stale = true
  395. base.lock.Unlock()
  396. // Destroy all the destructed accounts from the database
  397. for hash := range bottom.destructSet {
  398. // Skip any account not covered yet by the snapshot
  399. if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 {
  400. continue
  401. }
  402. // Remove all storage slots
  403. rawdb.DeleteAccountSnapshot(batch, hash)
  404. base.cache.Set(hash[:], nil)
  405. it := rawdb.IterateStorageSnapshots(base.diskdb, hash)
  406. for it.Next() {
  407. if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator
  408. batch.Delete(key)
  409. base.cache.Del(key[1:])
  410. snapshotFlushStorageItemMeter.Mark(1)
  411. }
  412. }
  413. it.Release()
  414. }
  415. // Push all updated accounts into the database
  416. for hash, data := range bottom.accountData {
  417. // Skip any account not covered yet by the snapshot
  418. if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 {
  419. continue
  420. }
  421. // Push the account to disk
  422. rawdb.WriteAccountSnapshot(batch, hash, data)
  423. base.cache.Set(hash[:], data)
  424. snapshotCleanAccountWriteMeter.Mark(int64(len(data)))
  425. snapshotFlushAccountItemMeter.Mark(1)
  426. snapshotFlushAccountSizeMeter.Mark(int64(len(data)))
  427. }
  428. // Push all the storage slots into the database
  429. for accountHash, storage := range bottom.storageData {
  430. // Skip any account not covered yet by the snapshot
  431. if base.genMarker != nil && bytes.Compare(accountHash[:], base.genMarker) > 0 {
  432. continue
  433. }
  434. // Generation might be mid-account, track that case too
  435. midAccount := base.genMarker != nil && bytes.Equal(accountHash[:], base.genMarker[:common.HashLength])
  436. for storageHash, data := range storage {
  437. // Skip any slot not covered yet by the snapshot
  438. if midAccount && bytes.Compare(storageHash[:], base.genMarker[common.HashLength:]) > 0 {
  439. continue
  440. }
  441. if len(data) > 0 {
  442. rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data)
  443. base.cache.Set(append(accountHash[:], storageHash[:]...), data)
  444. snapshotCleanStorageWriteMeter.Mark(int64(len(data)))
  445. } else {
  446. rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash)
  447. base.cache.Set(append(accountHash[:], storageHash[:]...), nil)
  448. }
  449. snapshotFlushStorageItemMeter.Mark(1)
  450. snapshotFlushStorageSizeMeter.Mark(int64(len(data)))
  451. }
  452. }
  453. // Update the snapshot block marker and write any remainder data
  454. rawdb.WriteSnapshotRoot(batch, bottom.root)
  455. // Write out the generator marker
  456. entry := journalGenerator{
  457. Done: base.genMarker == nil,
  458. Marker: base.genMarker,
  459. }
  460. if stats != nil {
  461. entry.Wiping = (stats.wiping != nil)
  462. entry.Accounts = stats.accounts
  463. entry.Slots = stats.slots
  464. entry.Storage = uint64(stats.storage)
  465. }
  466. blob, err := rlp.EncodeToBytes(entry)
  467. if err != nil {
  468. panic(fmt.Sprintf("Failed to RLP encode generator %v", err))
  469. }
  470. rawdb.WriteSnapshotGenerator(batch, blob)
  471. // Flush all the updates in the single db operation. Ensure the
  472. // disk layer transition is atomic.
  473. if err := batch.Write(); err != nil {
  474. log.Crit("Failed to write leftover snapshot", "err", err)
  475. }
  476. log.Debug("Journalled disk layer", "root", bottom.root, "complete", base.genMarker == nil)
  477. res := &diskLayer{
  478. root: bottom.root,
  479. cache: base.cache,
  480. diskdb: base.diskdb,
  481. triedb: base.triedb,
  482. genMarker: base.genMarker,
  483. genPending: base.genPending,
  484. }
  485. // If snapshot generation hasn't finished yet, port over all the starts and
  486. // continue where the previous round left off.
  487. //
  488. // Note, the `base.genAbort` comparison is not used normally, it's checked
  489. // to allow the tests to play with the marker without triggering this path.
  490. if base.genMarker != nil && base.genAbort != nil {
  491. res.genMarker = base.genMarker
  492. res.genAbort = make(chan chan *generatorStats)
  493. go res.generate(stats)
  494. }
  495. return res
  496. }
  497. // Journal commits an entire diff hierarchy to disk into a single journal entry.
  498. // This is meant to be used during shutdown to persist the snapshot without
  499. // flattening everything down (bad for reorgs).
  500. //
  501. // The method returns the root hash of the base layer that needs to be persisted
  502. // to disk as a trie too to allow continuing any pending generation op.
  503. func (t *Tree) Journal(root common.Hash) (common.Hash, error) {
  504. // Retrieve the head snapshot to journal from var snap snapshot
  505. snap := t.Snapshot(root)
  506. if snap == nil {
  507. return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root)
  508. }
  509. // Run the journaling
  510. t.lock.Lock()
  511. defer t.lock.Unlock()
  512. // Firstly write out the metadata of journal
  513. journal := new(bytes.Buffer)
  514. if err := rlp.Encode(journal, journalVersion); err != nil {
  515. return common.Hash{}, err
  516. }
  517. diskroot := t.diskRoot()
  518. if diskroot == (common.Hash{}) {
  519. return common.Hash{}, errors.New("invalid disk root")
  520. }
  521. // Secondly write out the disk layer root, ensure the
  522. // diff journal is continuous with disk.
  523. if err := rlp.Encode(journal, diskroot); err != nil {
  524. return common.Hash{}, err
  525. }
  526. // Finally write out the journal of each layer in reverse order.
  527. base, err := snap.(snapshot).Journal(journal)
  528. if err != nil {
  529. return common.Hash{}, err
  530. }
  531. // Store the journal into the database and return
  532. rawdb.WriteSnapshotJournal(t.diskdb, journal.Bytes())
  533. return base, nil
  534. }
  535. // LegacyJournal is basically identical to Journal. it's the legacy
  536. // version for flushing legacy journal. Now the only purpose of this
  537. // function is for testing.
  538. func (t *Tree) LegacyJournal(root common.Hash) (common.Hash, error) {
  539. // Retrieve the head snapshot to journal from var snap snapshot
  540. snap := t.Snapshot(root)
  541. if snap == nil {
  542. return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root)
  543. }
  544. // Run the journaling
  545. t.lock.Lock()
  546. defer t.lock.Unlock()
  547. journal := new(bytes.Buffer)
  548. base, err := snap.(snapshot).LegacyJournal(journal)
  549. if err != nil {
  550. return common.Hash{}, err
  551. }
  552. // Store the journal into the database and return
  553. rawdb.WriteSnapshotJournal(t.diskdb, journal.Bytes())
  554. return base, nil
  555. }
  556. // Rebuild wipes all available snapshot data from the persistent database and
  557. // discard all caches and diff layers. Afterwards, it starts a new snapshot
  558. // generator with the given root hash.
  559. func (t *Tree) Rebuild(root common.Hash) {
  560. t.lock.Lock()
  561. defer t.lock.Unlock()
  562. // Firstly delete any recovery flag in the database. Because now we are
  563. // building a brand new snapshot.
  564. rawdb.DeleteSnapshotRecoveryNumber(t.diskdb)
  565. // Track whether there's a wipe currently running and keep it alive if so
  566. var wiper chan struct{}
  567. // Iterate over and mark all layers stale
  568. for _, layer := range t.layers {
  569. switch layer := layer.(type) {
  570. case *diskLayer:
  571. // If the base layer is generating, abort it and save
  572. if layer.genAbort != nil {
  573. abort := make(chan *generatorStats)
  574. layer.genAbort <- abort
  575. if stats := <-abort; stats != nil {
  576. wiper = stats.wiping
  577. }
  578. }
  579. // Layer should be inactive now, mark it as stale
  580. layer.lock.Lock()
  581. layer.stale = true
  582. layer.lock.Unlock()
  583. case *diffLayer:
  584. // If the layer is a simple diff, simply mark as stale
  585. layer.lock.Lock()
  586. atomic.StoreUint32(&layer.stale, 1)
  587. layer.lock.Unlock()
  588. default:
  589. panic(fmt.Sprintf("unknown layer type: %T", layer))
  590. }
  591. }
  592. // Start generating a new snapshot from scratch on a backgroung thread. The
  593. // generator will run a wiper first if there's not one running right now.
  594. log.Info("Rebuilding state snapshot")
  595. t.layers = map[common.Hash]snapshot{
  596. root: generateSnapshot(t.diskdb, t.triedb, t.cache, root, wiper),
  597. }
  598. }
  599. // AccountIterator creates a new account iterator for the specified root hash and
  600. // seeks to a starting account hash.
  601. func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) {
  602. ok, err := t.generating()
  603. if err != nil {
  604. return nil, err
  605. }
  606. if ok {
  607. return nil, ErrNotConstructed
  608. }
  609. return newFastAccountIterator(t, root, seek)
  610. }
  611. // StorageIterator creates a new storage iterator for the specified root hash and
  612. // account. The iterator will be move to the specific start position.
  613. func (t *Tree) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) {
  614. ok, err := t.generating()
  615. if err != nil {
  616. return nil, err
  617. }
  618. if ok {
  619. return nil, ErrNotConstructed
  620. }
  621. return newFastStorageIterator(t, root, account, seek)
  622. }
  623. // disklayer is an internal helper function to return the disk layer.
  624. // The lock of snapTree is assumed to be held already.
  625. func (t *Tree) disklayer() *diskLayer {
  626. var snap snapshot
  627. for _, s := range t.layers {
  628. snap = s
  629. break
  630. }
  631. if snap == nil {
  632. return nil
  633. }
  634. switch layer := snap.(type) {
  635. case *diskLayer:
  636. return layer
  637. case *diffLayer:
  638. return layer.origin
  639. default:
  640. panic(fmt.Sprintf("%T: undefined layer", snap))
  641. }
  642. }
  643. // diskRoot is a internal helper function to return the disk layer root.
  644. // The lock of snapTree is assumed to be held already.
  645. func (t *Tree) diskRoot() common.Hash {
  646. disklayer := t.disklayer()
  647. if disklayer == nil {
  648. return common.Hash{}
  649. }
  650. return disklayer.Root()
  651. }
  652. // generating is an internal helper function which reports whether the snapshot
  653. // is still under the construction.
  654. func (t *Tree) generating() (bool, error) {
  655. t.lock.Lock()
  656. defer t.lock.Unlock()
  657. layer := t.disklayer()
  658. if layer == nil {
  659. return false, errors.New("disk layer is missing")
  660. }
  661. layer.lock.RLock()
  662. defer layer.lock.RUnlock()
  663. return layer.genMarker != nil, nil
  664. }
  665. // diskRoot is a external helper function to return the disk layer root.
  666. func (t *Tree) DiskRoot() common.Hash {
  667. t.lock.Lock()
  668. defer t.lock.Unlock()
  669. return t.diskRoot()
  670. }