difflayer.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. // Copyright 2019 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. package snapshot
  17. import (
  18. "fmt"
  19. "sort"
  20. "sync"
  21. "github.com/ethereum/go-ethereum/common"
  22. "github.com/ethereum/go-ethereum/core/rawdb"
  23. "github.com/ethereum/go-ethereum/ethdb"
  24. "github.com/ethereum/go-ethereum/log"
  25. "github.com/ethereum/go-ethereum/rlp"
  26. )
  27. // diffLayer represents a collection of modifications made to a state snapshot
  28. // after running a block on top. It contains one sorted list for the account trie
  29. // and one-one list for each storage tries.
  30. //
  31. // The goal of a diff layer is to act as a journal, tracking recent modifications
  32. // made to the state, that have not yet graduated into a semi-immutable state.
  33. type diffLayer struct {
  34. parent snapshot // Parent snapshot modified by this one, never nil
  35. memory uint64 // Approximate guess as to how much memory we use
  36. number uint64 // Block number to which this snapshot diff belongs to
  37. root common.Hash // Root hash to which this snapshot diff belongs to
  38. accountList []common.Hash // List of account for iteration, might not be sorted yet (lazy)
  39. accountSorted bool // Flag whether the account list has alreayd been sorted or not
  40. accountData map[common.Hash][]byte // Keyed accounts for direct retrival (nil means deleted)
  41. storageList map[common.Hash][]common.Hash // List of storage slots for iterated retrievals, one per account
  42. storageSorted map[common.Hash]bool // Flag whether the storage slot list has alreayd been sorted or not
  43. storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrival. one per account (nil means deleted)
  44. lock sync.RWMutex
  45. }
  46. // newDiffLayer creates a new diff on top of an existing snapshot, whether that's a low
  47. // level persistent database or a hierarchical diff already.
  48. func newDiffLayer(parent snapshot, number uint64, root common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
  49. // Create the new layer with some pre-allocated data segments
  50. dl := &diffLayer{
  51. parent: parent,
  52. number: number,
  53. root: root,
  54. accountData: accounts,
  55. storageData: storage,
  56. }
  57. // Fill the account hashes and sort them for the iterator
  58. accountList := make([]common.Hash, 0, len(accounts))
  59. for hash, data := range accounts {
  60. accountList = append(accountList, hash)
  61. dl.memory += uint64(len(data))
  62. }
  63. sort.Sort(hashes(accountList))
  64. dl.accountList = accountList
  65. dl.accountSorted = true
  66. dl.memory += uint64(len(dl.accountList) * common.HashLength)
  67. // Fill the storage hashes and sort them for the iterator
  68. dl.storageList = make(map[common.Hash][]common.Hash, len(storage))
  69. dl.storageSorted = make(map[common.Hash]bool, len(storage))
  70. for accountHash, slots := range storage {
  71. // If the slots are nil, sanity check that it's a deleted account
  72. if slots == nil {
  73. // Ensure that the account was just marked as deleted
  74. if account, ok := accounts[accountHash]; account != nil || !ok {
  75. panic(fmt.Sprintf("storage in %#x nil, but account conflicts (%#x, exists: %v)", accountHash, account, ok))
  76. }
  77. // Everything ok, store the deletion mark and continue
  78. dl.storageList[accountHash] = nil
  79. continue
  80. }
  81. // Storage slots are not nil so entire contract was not deleted, ensure the
  82. // account was just updated.
  83. if account, ok := accounts[accountHash]; account == nil || !ok {
  84. log.Error(fmt.Sprintf("storage in %#x exists, but account nil (exists: %v)", accountHash, ok))
  85. //panic(fmt.Sprintf("storage in %#x exists, but account nil (exists: %v)", accountHash, ok))
  86. }
  87. // Fill the storage hashes for this account and sort them for the iterator
  88. storageList := make([]common.Hash, 0, len(slots))
  89. for storageHash, data := range slots {
  90. storageList = append(storageList, storageHash)
  91. dl.memory += uint64(len(data))
  92. }
  93. sort.Sort(hashes(storageList))
  94. dl.storageList[accountHash] = storageList
  95. dl.storageSorted[accountHash] = true
  96. dl.memory += uint64(len(storageList) * common.HashLength)
  97. }
  98. dl.memory += uint64(len(dl.storageList) * common.HashLength)
  99. return dl
  100. }
  101. // Info returns the block number and root hash for which this snapshot was made.
  102. func (dl *diffLayer) Info() (uint64, common.Hash) {
  103. return dl.number, dl.root
  104. }
  105. // Account directly retrieves the account associated with a particular hash in
  106. // the snapshot slim data format.
  107. func (dl *diffLayer) Account(hash common.Hash) *Account {
  108. data := dl.AccountRLP(hash)
  109. if len(data) == 0 { // can be both nil and []byte{}
  110. return nil
  111. }
  112. account := new(Account)
  113. if err := rlp.DecodeBytes(data, account); err != nil {
  114. panic(err)
  115. }
  116. return account
  117. }
  118. // AccountRLP directly retrieves the account RLP associated with a particular
  119. // hash in the snapshot slim data format.
  120. func (dl *diffLayer) AccountRLP(hash common.Hash) []byte {
  121. dl.lock.RLock()
  122. defer dl.lock.RUnlock()
  123. // If the account is known locally, return it. Note, a nil account means it was
  124. // deleted, and is a different notion than an unknown account!
  125. if data, ok := dl.accountData[hash]; ok {
  126. return data
  127. }
  128. // Account unknown to this diff, resolve from parent
  129. return dl.parent.AccountRLP(hash)
  130. }
  131. // Storage directly retrieves the storage data associated with a particular hash,
  132. // within a particular account. If the slot is unknown to this diff, it's parent
  133. // is consulted.
  134. func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) []byte {
  135. dl.lock.RLock()
  136. defer dl.lock.RUnlock()
  137. // If the account is known locally, try to resolve the slot locally. Note, a nil
  138. // account means it was deleted, and is a different notion than an unknown account!
  139. if storage, ok := dl.storageData[accountHash]; ok {
  140. if storage == nil {
  141. return nil
  142. }
  143. if data, ok := storage[storageHash]; ok {
  144. return data
  145. }
  146. }
  147. // Account - or slot within - unknown to this diff, resolve from parent
  148. return dl.parent.Storage(accountHash, storageHash)
  149. }
  150. // Update creates a new layer on top of the existing snapshot diff tree with
  151. // the specified data items.
  152. func (dl *diffLayer) Update(blockRoot common.Hash, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer {
  153. return newDiffLayer(dl, dl.number+1, blockRoot, accounts, storage)
  154. }
  155. // Cap traverses downwards the diff tree until the number of allowed layers are
  156. // crossed. All diffs beyond the permitted number are flattened downwards. If
  157. // the layer limit is reached, memory cap is also enforced (but not before). The
  158. // block numbers for the disk layer and first diff layer are returned for GC.
  159. func (dl *diffLayer) Cap(layers int, memory uint64) (uint64, uint64) {
  160. // Dive until we run out of layers or reach the persistent database
  161. if layers > 2 {
  162. // If we still have diff layers below, recurse
  163. if parent, ok := dl.parent.(*diffLayer); ok {
  164. return parent.Cap(layers-1, memory)
  165. }
  166. // Diff stack too shallow, return block numbers without modifications
  167. return dl.parent.(*diskLayer).number, dl.number
  168. }
  169. // We're out of layers, flatten anything below, stopping if it's the disk or if
  170. // the memory limit is not yet exceeded.
  171. switch parent := dl.parent.(type) {
  172. case *diskLayer:
  173. return parent.number, dl.number
  174. case *diffLayer:
  175. dl.lock.Lock()
  176. defer dl.lock.Unlock()
  177. dl.parent = parent.flatten()
  178. if dl.parent.(*diffLayer).memory < memory {
  179. diskNumber, _ := parent.parent.Info()
  180. return diskNumber, parent.number
  181. }
  182. default:
  183. panic(fmt.Sprintf("unknown data layer: %T", parent))
  184. }
  185. // If the bottommost layer is larger than our memory cap, persist to disk
  186. var (
  187. parent = dl.parent.(*diffLayer)
  188. base = parent.parent.(*diskLayer)
  189. batch = base.db.NewBatch()
  190. )
  191. parent.lock.RLock()
  192. defer parent.lock.RUnlock()
  193. // Start by temporarilly deleting the current snapshot block marker. This
  194. // ensures that in the case of a crash, the entire snapshot is invalidated.
  195. rawdb.DeleteSnapshotBlock(batch)
  196. // Push all the accounts into the database
  197. for hash, data := range parent.accountData {
  198. if len(data) > 0 {
  199. // Account was updated, push to disk
  200. rawdb.WriteAccountSnapshot(batch, hash, data)
  201. base.cache.Set(string(hash[:]), data)
  202. if batch.ValueSize() > ethdb.IdealBatchSize {
  203. if err := batch.Write(); err != nil {
  204. log.Crit("Failed to write account snapshot", "err", err)
  205. }
  206. batch.Reset()
  207. }
  208. } else {
  209. // Account was deleted, remove all storage slots too
  210. rawdb.DeleteAccountSnapshot(batch, hash)
  211. base.cache.Set(string(hash[:]), nil)
  212. it := rawdb.IterateStorageSnapshots(base.db, hash)
  213. for it.Next() {
  214. if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator
  215. batch.Delete(key)
  216. base.cache.Delete(string(key[1:]))
  217. }
  218. }
  219. it.Release()
  220. }
  221. }
  222. // Push all the storage slots into the database
  223. for accountHash, storage := range parent.storageData {
  224. for storageHash, data := range storage {
  225. if len(data) > 0 {
  226. rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data)
  227. base.cache.Set(string(append(accountHash[:], storageHash[:]...)), data)
  228. } else {
  229. rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash)
  230. base.cache.Set(string(append(accountHash[:], storageHash[:]...)), nil)
  231. }
  232. }
  233. if batch.ValueSize() > ethdb.IdealBatchSize {
  234. if err := batch.Write(); err != nil {
  235. log.Crit("Failed to write storage snapshot", "err", err)
  236. }
  237. batch.Reset()
  238. }
  239. }
  240. // Update the snapshot block marker and write any remainder data
  241. base.number, base.root = parent.number, parent.root
  242. rawdb.WriteSnapshotBlock(batch, base.number, base.root)
  243. if err := batch.Write(); err != nil {
  244. log.Crit("Failed to write leftover snapshot", "err", err)
  245. }
  246. dl.parent = base
  247. return base.number, dl.number
  248. }
  249. // flatten pushes all data from this point downwards, flattening everything into
  250. // a single diff at the bottom. Since usually the lowermost diff is the largest,
  251. // the flattening bulds up from there in reverse.
  252. func (dl *diffLayer) flatten() snapshot {
  253. // If the parent is not diff, we're the first in line, return unmodified
  254. parent, ok := dl.parent.(*diffLayer)
  255. if !ok {
  256. return dl
  257. }
  258. // Parent is a diff, flatten it first (note, apart from weird corned cases,
  259. // flatten will realistically only ever merge 1 layer, so there's no need to
  260. // be smarter about grouping flattens together).
  261. parent = parent.flatten().(*diffLayer)
  262. // Overwrite all the updated accounts blindly, merge the sorted list
  263. for hash, data := range dl.accountData {
  264. parent.accountData[hash] = data
  265. }
  266. parent.accountList = append(parent.accountList, dl.accountList...) // TODO(karalabe): dedup!!
  267. parent.accountSorted = false
  268. // Overwrite all the updates storage slots (individually)
  269. for accountHash, storage := range dl.storageData {
  270. // If storage didn't exist (or was deleted) in the parent; or if the storage
  271. // was freshly deleted in the child, overwrite blindly
  272. if parent.storageData[accountHash] == nil || storage == nil {
  273. parent.storageList[accountHash] = dl.storageList[accountHash]
  274. parent.storageData[accountHash] = storage
  275. continue
  276. }
  277. // Storage exists in both parent and child, merge the slots
  278. comboData := parent.storageData[accountHash]
  279. for storageHash, data := range storage {
  280. comboData[storageHash] = data
  281. }
  282. parent.storageData[accountHash] = comboData
  283. parent.storageList[accountHash] = append(parent.storageList[accountHash], dl.storageList[accountHash]...) // TODO(karalabe): dedup!!
  284. parent.storageSorted[accountHash] = false
  285. }
  286. // Return the combo parent
  287. parent.number = dl.number
  288. parent.root = dl.root
  289. parent.memory += dl.memory
  290. return parent
  291. }
  292. // Journal commits an entire diff hierarchy to disk into a single journal file.
  293. // This is meant to be used during shutdown to persist the snapshot without
  294. // flattening everything down (bad for reorgs).
  295. func (dl *diffLayer) Journal() error {
  296. dl.lock.RLock()
  297. defer dl.lock.RUnlock()
  298. writer, err := dl.journal()
  299. if err != nil {
  300. return err
  301. }
  302. writer.Close()
  303. return nil
  304. }