database.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. // Copyright 2018 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. package trie
  17. import (
  18. "sync"
  19. "time"
  20. "github.com/ethereum/go-ethereum/common"
  21. "github.com/ethereum/go-ethereum/ethdb"
  22. "github.com/ethereum/go-ethereum/log"
  23. )
  24. // secureKeyPrefix is the database key prefix used to store trie node preimages.
  25. var secureKeyPrefix = []byte("secure-key-")
  26. // secureKeyLength is the length of the above prefix + 32byte hash.
  27. const secureKeyLength = 11 + 32
  28. // DatabaseReader wraps the Get and Has method of a backing store for the trie.
  29. type DatabaseReader interface {
  30. // Get retrieves the value associated with key form the database.
  31. Get(key []byte) (value []byte, err error)
  32. // Has retrieves whether a key is present in the database.
  33. Has(key []byte) (bool, error)
  34. }
  35. // Database is an intermediate write layer between the trie data structures and
  36. // the disk database. The aim is to accumulate trie writes in-memory and only
  37. // periodically flush a couple tries to disk, garbage collecting the remainder.
  38. type Database struct {
  39. diskdb ethdb.Database // Persistent storage for matured trie nodes
  40. nodes map[common.Hash]*cachedNode // Data and references relationships of a node
  41. preimages map[common.Hash][]byte // Preimages of nodes from the secure trie
  42. seckeybuf [secureKeyLength]byte // Ephemeral buffer for calculating preimage keys
  43. gctime time.Duration // Time spent on garbage collection since last commit
  44. gcnodes uint64 // Nodes garbage collected since last commit
  45. gcsize common.StorageSize // Data storage garbage collected since last commit
  46. nodesSize common.StorageSize // Storage size of the nodes cache
  47. preimagesSize common.StorageSize // Storage size of the preimages cache
  48. lock sync.RWMutex
  49. }
  50. // cachedNode is all the information we know about a single cached node in the
  51. // memory database write layer.
  52. type cachedNode struct {
  53. blob []byte // Cached data block of the trie node
  54. parents int // Number of live nodes referencing this one
  55. children map[common.Hash]int // Children referenced by this nodes
  56. }
  57. // NewDatabase creates a new trie database to store ephemeral trie content before
  58. // its written out to disk or garbage collected.
  59. func NewDatabase(diskdb ethdb.Database) *Database {
  60. return &Database{
  61. diskdb: diskdb,
  62. nodes: map[common.Hash]*cachedNode{
  63. {}: {children: make(map[common.Hash]int)},
  64. },
  65. preimages: make(map[common.Hash][]byte),
  66. }
  67. }
  68. // DiskDB retrieves the persistent storage backing the trie database.
  69. func (db *Database) DiskDB() DatabaseReader {
  70. return db.diskdb
  71. }
  72. // Insert writes a new trie node to the memory database if it's yet unknown. The
  73. // method will make a copy of the slice.
  74. func (db *Database) Insert(hash common.Hash, blob []byte) {
  75. db.lock.Lock()
  76. defer db.lock.Unlock()
  77. db.insert(hash, blob)
  78. }
  79. // insert is the private locked version of Insert.
  80. func (db *Database) insert(hash common.Hash, blob []byte) {
  81. if _, ok := db.nodes[hash]; ok {
  82. return
  83. }
  84. db.nodes[hash] = &cachedNode{
  85. blob: common.CopyBytes(blob),
  86. children: make(map[common.Hash]int),
  87. }
  88. db.nodesSize += common.StorageSize(common.HashLength + len(blob))
  89. }
  90. // insertPreimage writes a new trie node pre-image to the memory database if it's
  91. // yet unknown. The method will make a copy of the slice.
  92. //
  93. // Note, this method assumes that the database's lock is held!
  94. func (db *Database) insertPreimage(hash common.Hash, preimage []byte) {
  95. if _, ok := db.preimages[hash]; ok {
  96. return
  97. }
  98. db.preimages[hash] = common.CopyBytes(preimage)
  99. db.preimagesSize += common.StorageSize(common.HashLength + len(preimage))
  100. }
  101. // Node retrieves a cached trie node from memory. If it cannot be found cached,
  102. // the method queries the persistent database for the content.
  103. func (db *Database) Node(hash common.Hash) ([]byte, error) {
  104. // Retrieve the node from cache if available
  105. db.lock.RLock()
  106. node := db.nodes[hash]
  107. db.lock.RUnlock()
  108. if node != nil {
  109. return node.blob, nil
  110. }
  111. // Content unavailable in memory, attempt to retrieve from disk
  112. return db.diskdb.Get(hash[:])
  113. }
  114. // preimage retrieves a cached trie node pre-image from memory. If it cannot be
  115. // found cached, the method queries the persistent database for the content.
  116. func (db *Database) preimage(hash common.Hash) ([]byte, error) {
  117. // Retrieve the node from cache if available
  118. db.lock.RLock()
  119. preimage := db.preimages[hash]
  120. db.lock.RUnlock()
  121. if preimage != nil {
  122. return preimage, nil
  123. }
  124. // Content unavailable in memory, attempt to retrieve from disk
  125. return db.diskdb.Get(db.secureKey(hash[:]))
  126. }
  127. // secureKey returns the database key for the preimage of key, as an ephemeral
  128. // buffer. The caller must not hold onto the return value because it will become
  129. // invalid on the next call.
  130. func (db *Database) secureKey(key []byte) []byte {
  131. buf := append(db.seckeybuf[:0], secureKeyPrefix...)
  132. buf = append(buf, key...)
  133. return buf
  134. }
  135. // Nodes retrieves the hashes of all the nodes cached within the memory database.
  136. // This method is extremely expensive and should only be used to validate internal
  137. // states in test code.
  138. func (db *Database) Nodes() []common.Hash {
  139. db.lock.RLock()
  140. defer db.lock.RUnlock()
  141. var hashes = make([]common.Hash, 0, len(db.nodes))
  142. for hash := range db.nodes {
  143. if hash != (common.Hash{}) { // Special case for "root" references/nodes
  144. hashes = append(hashes, hash)
  145. }
  146. }
  147. return hashes
  148. }
  149. // Reference adds a new reference from a parent node to a child node.
  150. func (db *Database) Reference(child common.Hash, parent common.Hash) {
  151. db.lock.RLock()
  152. defer db.lock.RUnlock()
  153. db.reference(child, parent)
  154. }
  155. // reference is the private locked version of Reference.
  156. func (db *Database) reference(child common.Hash, parent common.Hash) {
  157. // If the node does not exist, it's a node pulled from disk, skip
  158. node, ok := db.nodes[child]
  159. if !ok {
  160. return
  161. }
  162. // If the reference already exists, only duplicate for roots
  163. if _, ok = db.nodes[parent].children[child]; ok && parent != (common.Hash{}) {
  164. return
  165. }
  166. node.parents++
  167. db.nodes[parent].children[child]++
  168. }
  169. // Dereference removes an existing reference from a parent node to a child node.
  170. func (db *Database) Dereference(child common.Hash, parent common.Hash) {
  171. db.lock.Lock()
  172. defer db.lock.Unlock()
  173. nodes, storage, start := len(db.nodes), db.nodesSize, time.Now()
  174. db.dereference(child, parent)
  175. db.gcnodes += uint64(nodes - len(db.nodes))
  176. db.gcsize += storage - db.nodesSize
  177. db.gctime += time.Since(start)
  178. log.Debug("Dereferenced trie from memory database", "nodes", nodes-len(db.nodes), "size", storage-db.nodesSize, "time", time.Since(start),
  179. "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.nodes), "livesize", db.nodesSize)
  180. }
  181. // dereference is the private locked version of Dereference.
  182. func (db *Database) dereference(child common.Hash, parent common.Hash) {
  183. // Dereference the parent-child
  184. node := db.nodes[parent]
  185. node.children[child]--
  186. if node.children[child] == 0 {
  187. delete(node.children, child)
  188. }
  189. // If the node does not exist, it's a previously committed node.
  190. node, ok := db.nodes[child]
  191. if !ok {
  192. return
  193. }
  194. // If there are no more references to the child, delete it and cascade
  195. node.parents--
  196. if node.parents == 0 {
  197. for hash := range node.children {
  198. db.dereference(hash, child)
  199. }
  200. delete(db.nodes, child)
  201. db.nodesSize -= common.StorageSize(common.HashLength + len(node.blob))
  202. }
  203. }
  204. // Commit iterates over all the children of a particular node, writes them out
  205. // to disk, forcefully tearing down all references in both directions.
  206. //
  207. // As a side effect, all pre-images accumulated up to this point are also written.
  208. func (db *Database) Commit(node common.Hash, report bool) error {
  209. // Create a database batch to flush persistent data out. It is important that
  210. // outside code doesn't see an inconsistent state (referenced data removed from
  211. // memory cache during commit but not yet in persistent storage). This is ensured
  212. // by only uncaching existing data when the database write finalizes.
  213. db.lock.RLock()
  214. start := time.Now()
  215. batch := db.diskdb.NewBatch()
  216. // Move all of the accumulated preimages into a write batch
  217. for hash, preimage := range db.preimages {
  218. if err := batch.Put(db.secureKey(hash[:]), preimage); err != nil {
  219. log.Error("Failed to commit preimage from trie database", "err", err)
  220. db.lock.RUnlock()
  221. return err
  222. }
  223. if batch.ValueSize() > ethdb.IdealBatchSize {
  224. if err := batch.Write(); err != nil {
  225. return err
  226. }
  227. batch.Reset()
  228. }
  229. }
  230. // Move the trie itself into the batch, flushing if enough data is accumulated
  231. nodes, storage := len(db.nodes), db.nodesSize+db.preimagesSize
  232. if err := db.commit(node, batch); err != nil {
  233. log.Error("Failed to commit trie from trie database", "err", err)
  234. db.lock.RUnlock()
  235. return err
  236. }
  237. // Write batch ready, unlock for readers during persistence
  238. if err := batch.Write(); err != nil {
  239. log.Error("Failed to write trie to disk", "err", err)
  240. db.lock.RUnlock()
  241. return err
  242. }
  243. db.lock.RUnlock()
  244. // Write successful, clear out the flushed data
  245. db.lock.Lock()
  246. defer db.lock.Unlock()
  247. db.preimages = make(map[common.Hash][]byte)
  248. db.preimagesSize = 0
  249. db.uncache(node)
  250. logger := log.Info
  251. if !report {
  252. logger = log.Debug
  253. }
  254. logger("Persisted trie from memory database", "nodes", nodes-len(db.nodes), "size", storage-db.nodesSize, "time", time.Since(start),
  255. "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.nodes), "livesize", db.nodesSize)
  256. // Reset the garbage collection statistics
  257. db.gcnodes, db.gcsize, db.gctime = 0, 0, 0
  258. return nil
  259. }
  260. // commit is the private locked version of Commit.
  261. func (db *Database) commit(hash common.Hash, batch ethdb.Batch) error {
  262. // If the node does not exist, it's a previously committed node
  263. node, ok := db.nodes[hash]
  264. if !ok {
  265. return nil
  266. }
  267. for child := range node.children {
  268. if err := db.commit(child, batch); err != nil {
  269. return err
  270. }
  271. }
  272. if err := batch.Put(hash[:], node.blob); err != nil {
  273. return err
  274. }
  275. // If we've reached an optimal match size, commit and start over
  276. if batch.ValueSize() >= ethdb.IdealBatchSize {
  277. if err := batch.Write(); err != nil {
  278. return err
  279. }
  280. batch.Reset()
  281. }
  282. return nil
  283. }
  284. // uncache is the post-processing step of a commit operation where the already
  285. // persisted trie is removed from the cache. The reason behind the two-phase
  286. // commit is to ensure consistent data availability while moving from memory
  287. // to disk.
  288. func (db *Database) uncache(hash common.Hash) {
  289. // If the node does not exist, we're done on this path
  290. node, ok := db.nodes[hash]
  291. if !ok {
  292. return
  293. }
  294. // Otherwise uncache the node's subtries and remove the node itself too
  295. for child := range node.children {
  296. db.uncache(child)
  297. }
  298. delete(db.nodes, hash)
  299. db.nodesSize -= common.StorageSize(common.HashLength + len(node.blob))
  300. }
  301. // Size returns the current storage size of the memory cache in front of the
  302. // persistent database layer.
  303. func (db *Database) Size() common.StorageSize {
  304. db.lock.RLock()
  305. defer db.lock.RUnlock()
  306. return db.nodesSize + db.preimagesSize
  307. }