sync.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. // Copyright 2015 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. package trie
  17. import (
  18. "errors"
  19. "fmt"
  20. "github.com/ethereum/go-ethereum/common"
  21. "github.com/ethereum/go-ethereum/common/prque"
  22. "github.com/ethereum/go-ethereum/ethdb"
  23. )
  24. // ErrNotRequested is returned by the trie sync when it's requested to process a
  25. // node it did not request.
  26. var ErrNotRequested = errors.New("not requested")
  27. // ErrAlreadyProcessed is returned by the trie sync when it's requested to process a
  28. // node it already processed previously.
  29. var ErrAlreadyProcessed = errors.New("already processed")
  30. // request represents a scheduled or already in-flight state retrieval request.
  31. type request struct {
  32. hash common.Hash // Hash of the node data content to retrieve
  33. data []byte // Data content of the node, cached until all subtrees complete
  34. raw bool // Whether this is a raw entry (code) or a trie node
  35. parents []*request // Parent state nodes referencing this entry (notify all upon completion)
  36. depth int // Depth level within the trie the node is located to prioritise DFS
  37. deps int // Number of dependencies before allowed to commit this node
  38. callback LeafCallback // Callback to invoke if a leaf node it reached on this branch
  39. }
  40. // SyncResult is a simple list to return missing nodes along with their request
  41. // hashes.
  42. type SyncResult struct {
  43. Hash common.Hash // Hash of the originally unknown trie node
  44. Data []byte // Data content of the retrieved node
  45. }
  46. // syncMemBatch is an in-memory buffer of successfully downloaded but not yet
  47. // persisted data items.
  48. type syncMemBatch struct {
  49. batch map[common.Hash][]byte // In-memory membatch of recently completed items
  50. order []common.Hash // Order of completion to prevent out-of-order data loss
  51. }
  52. // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes.
  53. func newSyncMemBatch() *syncMemBatch {
  54. return &syncMemBatch{
  55. batch: make(map[common.Hash][]byte),
  56. order: make([]common.Hash, 0, 256),
  57. }
  58. }
  59. // Sync is the main state trie synchronisation scheduler, which provides yet
  60. // unknown trie hashes to retrieve, accepts node data associated with said hashes
  61. // and reconstructs the trie step by step until all is done.
  62. type Sync struct {
  63. database ethdb.KeyValueReader // Persistent database to check for existing entries
  64. membatch *syncMemBatch // Memory buffer to avoid frequent database writes
  65. requests map[common.Hash]*request // Pending requests pertaining to a key hash
  66. queue *prque.Prque // Priority queue with the pending requests
  67. bloom *SyncBloom // Bloom filter for fast node existence checks
  68. }
  69. // NewSync creates a new trie data download scheduler.
  70. func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallback, bloom *SyncBloom) *Sync {
  71. ts := &Sync{
  72. database: database,
  73. membatch: newSyncMemBatch(),
  74. requests: make(map[common.Hash]*request),
  75. queue: prque.New(nil),
  76. bloom: bloom,
  77. }
  78. ts.AddSubTrie(root, 0, common.Hash{}, callback)
  79. return ts
  80. }
  81. // AddSubTrie registers a new trie to the sync code, rooted at the designated parent.
  82. func (s *Sync) AddSubTrie(root common.Hash, depth int, parent common.Hash, callback LeafCallback) {
  83. // Short circuit if the trie is empty or already known
  84. if root == emptyRoot {
  85. return
  86. }
  87. if _, ok := s.membatch.batch[root]; ok {
  88. return
  89. }
  90. if s.bloom.Contains(root[:]) {
  91. // Bloom filter says this might be a duplicate, double check
  92. blob, _ := s.database.Get(root[:])
  93. if local, err := decodeNode(root[:], blob); local != nil && err == nil {
  94. return
  95. }
  96. // False positive, bump fault meter
  97. bloomFaultMeter.Mark(1)
  98. }
  99. // Assemble the new sub-trie sync request
  100. req := &request{
  101. hash: root,
  102. depth: depth,
  103. callback: callback,
  104. }
  105. // If this sub-trie has a designated parent, link them together
  106. if parent != (common.Hash{}) {
  107. ancestor := s.requests[parent]
  108. if ancestor == nil {
  109. panic(fmt.Sprintf("sub-trie ancestor not found: %x", parent))
  110. }
  111. ancestor.deps++
  112. req.parents = append(req.parents, ancestor)
  113. }
  114. s.schedule(req)
  115. }
  116. // AddRawEntry schedules the direct retrieval of a state entry that should not be
  117. // interpreted as a trie node, but rather accepted and stored into the database
  118. // as is. This method's goal is to support misc state metadata retrievals (e.g.
  119. // contract code).
  120. func (s *Sync) AddRawEntry(hash common.Hash, depth int, parent common.Hash) {
  121. // Short circuit if the entry is empty or already known
  122. if hash == emptyState {
  123. return
  124. }
  125. if _, ok := s.membatch.batch[hash]; ok {
  126. return
  127. }
  128. if s.bloom.Contains(hash[:]) {
  129. // Bloom filter says this might be a duplicate, double check
  130. if ok, _ := s.database.Has(hash[:]); ok {
  131. return
  132. }
  133. // False positive, bump fault meter
  134. bloomFaultMeter.Mark(1)
  135. }
  136. // Assemble the new sub-trie sync request
  137. req := &request{
  138. hash: hash,
  139. raw: true,
  140. depth: depth,
  141. }
  142. // If this sub-trie has a designated parent, link them together
  143. if parent != (common.Hash{}) {
  144. ancestor := s.requests[parent]
  145. if ancestor == nil {
  146. panic(fmt.Sprintf("raw-entry ancestor not found: %x", parent))
  147. }
  148. ancestor.deps++
  149. req.parents = append(req.parents, ancestor)
  150. }
  151. s.schedule(req)
  152. }
  153. // Missing retrieves the known missing nodes from the trie for retrieval.
  154. func (s *Sync) Missing(max int) []common.Hash {
  155. var requests []common.Hash
  156. for !s.queue.Empty() && (max == 0 || len(requests) < max) {
  157. requests = append(requests, s.queue.PopItem().(common.Hash))
  158. }
  159. return requests
  160. }
  161. // Process injects a batch of retrieved trie nodes data, returning if something
  162. // was committed to the database and also the index of an entry if processing of
  163. // it failed.
  164. func (s *Sync) Process(results []SyncResult) (bool, int, error) {
  165. committed := false
  166. for i, item := range results {
  167. // If the item was not requested, bail out
  168. request := s.requests[item.Hash]
  169. if request == nil {
  170. return committed, i, ErrNotRequested
  171. }
  172. if request.data != nil {
  173. return committed, i, ErrAlreadyProcessed
  174. }
  175. // If the item is a raw entry request, commit directly
  176. if request.raw {
  177. request.data = item.Data
  178. s.commit(request)
  179. committed = true
  180. continue
  181. }
  182. // Decode the node data content and update the request
  183. node, err := decodeNode(item.Hash[:], item.Data)
  184. if err != nil {
  185. return committed, i, err
  186. }
  187. request.data = item.Data
  188. // Create and schedule a request for all the children nodes
  189. requests, err := s.children(request, node)
  190. if err != nil {
  191. return committed, i, err
  192. }
  193. if len(requests) == 0 && request.deps == 0 {
  194. s.commit(request)
  195. committed = true
  196. continue
  197. }
  198. request.deps += len(requests)
  199. for _, child := range requests {
  200. s.schedule(child)
  201. }
  202. }
  203. return committed, 0, nil
  204. }
  205. // Commit flushes the data stored in the internal membatch out to persistent
  206. // storage, returning the number of items written and any occurred error.
  207. func (s *Sync) Commit(dbw ethdb.KeyValueWriter) (int, error) {
  208. // Dump the membatch into a database dbw
  209. for i, key := range s.membatch.order {
  210. if err := dbw.Put(key[:], s.membatch.batch[key]); err != nil {
  211. return i, err
  212. }
  213. s.bloom.Add(key[:])
  214. }
  215. written := len(s.membatch.order) // TODO(karalabe): could an order change improve write performance?
  216. // Drop the membatch data and return
  217. s.membatch = newSyncMemBatch()
  218. return written, nil
  219. }
  220. // Pending returns the number of state entries currently pending for download.
  221. func (s *Sync) Pending() int {
  222. return len(s.requests)
  223. }
  224. // schedule inserts a new state retrieval request into the fetch queue. If there
  225. // is already a pending request for this node, the new request will be discarded
  226. // and only a parent reference added to the old one.
  227. func (s *Sync) schedule(req *request) {
  228. // If we're already requesting this node, add a new reference and stop
  229. if old, ok := s.requests[req.hash]; ok {
  230. old.parents = append(old.parents, req.parents...)
  231. return
  232. }
  233. // Schedule the request for future retrieval
  234. s.queue.Push(req.hash, int64(req.depth))
  235. s.requests[req.hash] = req
  236. }
  237. // children retrieves all the missing children of a state trie entry for future
  238. // retrieval scheduling.
  239. func (s *Sync) children(req *request, object node) ([]*request, error) {
  240. // Gather all the children of the node, irrelevant whether known or not
  241. type child struct {
  242. node node
  243. depth int
  244. }
  245. var children []child
  246. switch node := (object).(type) {
  247. case *shortNode:
  248. children = []child{{
  249. node: node.Val,
  250. depth: req.depth + len(node.Key),
  251. }}
  252. case *fullNode:
  253. for i := 0; i < 17; i++ {
  254. if node.Children[i] != nil {
  255. children = append(children, child{
  256. node: node.Children[i],
  257. depth: req.depth + 1,
  258. })
  259. }
  260. }
  261. default:
  262. panic(fmt.Sprintf("unknown node: %+v", node))
  263. }
  264. // Iterate over the children, and request all unknown ones
  265. requests := make([]*request, 0, len(children))
  266. for _, child := range children {
  267. // Notify any external watcher of a new key/value node
  268. if req.callback != nil {
  269. if node, ok := (child.node).(valueNode); ok {
  270. if err := req.callback(node, req.hash); err != nil {
  271. return nil, err
  272. }
  273. }
  274. }
  275. // If the child references another node, resolve or schedule
  276. if node, ok := (child.node).(hashNode); ok {
  277. // Try to resolve the node from the local database
  278. hash := common.BytesToHash(node)
  279. if _, ok := s.membatch.batch[hash]; ok {
  280. continue
  281. }
  282. if s.bloom.Contains(node) {
  283. // Bloom filter says this might be a duplicate, double check
  284. if ok, _ := s.database.Has(node); ok {
  285. continue
  286. }
  287. // False positive, bump fault meter
  288. bloomFaultMeter.Mark(1)
  289. }
  290. // Locally unknown node, schedule for retrieval
  291. requests = append(requests, &request{
  292. hash: hash,
  293. parents: []*request{req},
  294. depth: child.depth,
  295. callback: req.callback,
  296. })
  297. }
  298. }
  299. return requests, nil
  300. }
  301. // commit finalizes a retrieval request and stores it into the membatch. If any
  302. // of the referencing parent requests complete due to this commit, they are also
  303. // committed themselves.
  304. func (s *Sync) commit(req *request) (err error) {
  305. // Write the node content to the membatch
  306. s.membatch.batch[req.hash] = req.data
  307. s.membatch.order = append(s.membatch.order, req.hash)
  308. delete(s.requests, req.hash)
  309. // Check all parents for completion
  310. for _, parent := range req.parents {
  311. parent.deps--
  312. if parent.deps == 0 {
  313. if err := s.commit(parent); err != nil {
  314. return err
  315. }
  316. }
  317. }
  318. return nil
  319. }