trie.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. // Copyright 2014 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. // Package trie implements Merkle Patricia Tries.
  17. package trie
  18. import (
  19. "bytes"
  20. "errors"
  21. "fmt"
  22. "hash"
  23. "github.com/ethereum/go-ethereum/common"
  24. "github.com/ethereum/go-ethereum/crypto/sha3"
  25. "github.com/ethereum/go-ethereum/logger"
  26. "github.com/ethereum/go-ethereum/logger/glog"
  27. "github.com/ethereum/go-ethereum/rlp"
  28. )
  29. const defaultCacheCapacity = 800
  30. var (
  31. // The global cache stores decoded trie nodes by hash as they get loaded.
  32. globalCache = newARC(defaultCacheCapacity)
  33. // This is the known root hash of an empty trie.
  34. emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
  35. )
  36. var ErrMissingRoot = errors.New("missing root node")
  37. // Database must be implemented by backing stores for the trie.
  38. type Database interface {
  39. DatabaseWriter
  40. // Get returns the value for key from the database.
  41. Get(key []byte) (value []byte, err error)
  42. }
  43. // DatabaseWriter wraps the Put method of a backing store for the trie.
  44. type DatabaseWriter interface {
  45. // Put stores the mapping key->value in the database.
  46. // Implementations must not hold onto the value bytes, the trie
  47. // will reuse the slice across calls to Put.
  48. Put(key, value []byte) error
  49. }
  50. // Trie is a Merkle Patricia Trie.
  51. // The zero value is an empty trie with no database.
  52. // Use New to create a trie that sits on top of a database.
  53. //
  54. // Trie is not safe for concurrent use.
  55. type Trie struct {
  56. root node
  57. db Database
  58. *hasher
  59. }
  60. // New creates a trie with an existing root node from db.
  61. //
  62. // If root is the zero hash or the sha3 hash of an empty string, the
  63. // trie is initially empty and does not require a database. Otherwise,
  64. // New will panics if db is nil or root does not exist in the
  65. // database. Accessing the trie loads nodes from db on demand.
  66. func New(root common.Hash, db Database) (*Trie, error) {
  67. trie := &Trie{db: db}
  68. if (root != common.Hash{}) && root != emptyRoot {
  69. if db == nil {
  70. panic("trie.New: cannot use existing root without a database")
  71. }
  72. if v, _ := trie.db.Get(root[:]); len(v) == 0 {
  73. return nil, ErrMissingRoot
  74. }
  75. trie.root = hashNode(root.Bytes())
  76. }
  77. return trie, nil
  78. }
  79. // Iterator returns an iterator over all mappings in the trie.
  80. func (t *Trie) Iterator() *Iterator {
  81. return NewIterator(t)
  82. }
  83. // Get returns the value for key stored in the trie.
  84. // The value bytes must not be modified by the caller.
  85. func (t *Trie) Get(key []byte) []byte {
  86. key = compactHexDecode(key)
  87. tn := t.root
  88. for len(key) > 0 {
  89. switch n := tn.(type) {
  90. case shortNode:
  91. if len(key) < len(n.Key) || !bytes.Equal(n.Key, key[:len(n.Key)]) {
  92. return nil
  93. }
  94. tn = n.Val
  95. key = key[len(n.Key):]
  96. case fullNode:
  97. tn = n[key[0]]
  98. key = key[1:]
  99. case nil:
  100. return nil
  101. case hashNode:
  102. tn = t.resolveHash(n)
  103. default:
  104. panic(fmt.Sprintf("%T: invalid node: %v", tn, tn))
  105. }
  106. }
  107. return tn.(valueNode)
  108. }
  109. // Update associates key with value in the trie. Subsequent calls to
  110. // Get will return value. If value has length zero, any existing value
  111. // is deleted from the trie and calls to Get will return nil.
  112. //
  113. // The value bytes must not be modified by the caller while they are
  114. // stored in the trie.
  115. func (t *Trie) Update(key, value []byte) {
  116. k := compactHexDecode(key)
  117. if len(value) != 0 {
  118. t.root = t.insert(t.root, k, valueNode(value))
  119. } else {
  120. t.root = t.delete(t.root, k)
  121. }
  122. }
  123. func (t *Trie) insert(n node, key []byte, value node) node {
  124. if len(key) == 0 {
  125. return value
  126. }
  127. switch n := n.(type) {
  128. case shortNode:
  129. matchlen := prefixLen(key, n.Key)
  130. // If the whole key matches, keep this short node as is
  131. // and only update the value.
  132. if matchlen == len(n.Key) {
  133. return shortNode{n.Key, t.insert(n.Val, key[matchlen:], value)}
  134. }
  135. // Otherwise branch out at the index where they differ.
  136. var branch fullNode
  137. branch[n.Key[matchlen]] = t.insert(nil, n.Key[matchlen+1:], n.Val)
  138. branch[key[matchlen]] = t.insert(nil, key[matchlen+1:], value)
  139. // Replace this shortNode with the branch if it occurs at index 0.
  140. if matchlen == 0 {
  141. return branch
  142. }
  143. // Otherwise, replace it with a short node leading up to the branch.
  144. return shortNode{key[:matchlen], branch}
  145. case fullNode:
  146. n[key[0]] = t.insert(n[key[0]], key[1:], value)
  147. return n
  148. case nil:
  149. return shortNode{key, value}
  150. case hashNode:
  151. // We've hit a part of the trie that isn't loaded yet. Load
  152. // the node and insert into it. This leaves all child nodes on
  153. // the path to the value in the trie.
  154. //
  155. // TODO: track whether insertion changed the value and keep
  156. // n as a hash node if it didn't.
  157. return t.insert(t.resolveHash(n), key, value)
  158. default:
  159. panic(fmt.Sprintf("%T: invalid node: %v", n, n))
  160. }
  161. }
  162. // Delete removes any existing value for key from the trie.
  163. func (t *Trie) Delete(key []byte) {
  164. k := compactHexDecode(key)
  165. t.root = t.delete(t.root, k)
  166. }
  167. // delete returns the new root of the trie with key deleted.
  168. // It reduces the trie to minimal form by simplifying
  169. // nodes on the way up after deleting recursively.
  170. func (t *Trie) delete(n node, key []byte) node {
  171. switch n := n.(type) {
  172. case shortNode:
  173. matchlen := prefixLen(key, n.Key)
  174. if matchlen < len(n.Key) {
  175. return n // don't replace n on mismatch
  176. }
  177. if matchlen == len(key) {
  178. return nil // remove n entirely for whole matches
  179. }
  180. // The key is longer than n.Key. Remove the remaining suffix
  181. // from the subtrie. Child can never be nil here since the
  182. // subtrie must contain at least two other values with keys
  183. // longer than n.Key.
  184. child := t.delete(n.Val, key[len(n.Key):])
  185. switch child := child.(type) {
  186. case shortNode:
  187. // Deleting from the subtrie reduced it to another
  188. // short node. Merge the nodes to avoid creating a
  189. // shortNode{..., shortNode{...}}. Use concat (which
  190. // always creates a new slice) instead of append to
  191. // avoid modifying n.Key since it might be shared with
  192. // other nodes.
  193. return shortNode{concat(n.Key, child.Key...), child.Val}
  194. default:
  195. return shortNode{n.Key, child}
  196. }
  197. case fullNode:
  198. n[key[0]] = t.delete(n[key[0]], key[1:])
  199. // Check how many non-nil entries are left after deleting and
  200. // reduce the full node to a short node if only one entry is
  201. // left. Since n must've contained at least two children
  202. // before deletion (otherwise it would not be a full node) n
  203. // can never be reduced to nil.
  204. //
  205. // When the loop is done, pos contains the index of the single
  206. // value that is left in n or -2 if n contains at least two
  207. // values.
  208. pos := -1
  209. for i, cld := range n {
  210. if cld != nil {
  211. if pos == -1 {
  212. pos = i
  213. } else {
  214. pos = -2
  215. break
  216. }
  217. }
  218. }
  219. if pos >= 0 {
  220. if pos != 16 {
  221. // If the remaining entry is a short node, it replaces
  222. // n and its key gets the missing nibble tacked to the
  223. // front. This avoids creating an invalid
  224. // shortNode{..., shortNode{...}}. Since the entry
  225. // might not be loaded yet, resolve it just for this
  226. // check.
  227. cnode := t.resolve(n[pos])
  228. if cnode, ok := cnode.(shortNode); ok {
  229. k := append([]byte{byte(pos)}, cnode.Key...)
  230. return shortNode{k, cnode.Val}
  231. }
  232. }
  233. // Otherwise, n is replaced by a one-nibble short node
  234. // containing the child.
  235. return shortNode{[]byte{byte(pos)}, n[pos]}
  236. }
  237. // n still contains at least two values and cannot be reduced.
  238. return n
  239. case nil:
  240. return nil
  241. case hashNode:
  242. // We've hit a part of the trie that isn't loaded yet. Load
  243. // the node and delete from it. This leaves all child nodes on
  244. // the path to the value in the trie.
  245. //
  246. // TODO: track whether deletion actually hit a key and keep
  247. // n as a hash node if it didn't.
  248. return t.delete(t.resolveHash(n), key)
  249. default:
  250. panic(fmt.Sprintf("%T: invalid node: %v (%v)", n, n, key))
  251. }
  252. }
  253. func concat(s1 []byte, s2 ...byte) []byte {
  254. r := make([]byte, len(s1)+len(s2))
  255. copy(r, s1)
  256. copy(r[len(s1):], s2)
  257. return r
  258. }
  259. func (t *Trie) resolve(n node) node {
  260. if n, ok := n.(hashNode); ok {
  261. return t.resolveHash(n)
  262. }
  263. return n
  264. }
  265. func (t *Trie) resolveHash(n hashNode) node {
  266. if v, ok := globalCache.Get(n); ok {
  267. return v
  268. }
  269. enc, err := t.db.Get(n)
  270. if err != nil || enc == nil {
  271. // TODO: This needs to be improved to properly distinguish errors.
  272. // Disk I/O errors shouldn't produce nil (and cause a
  273. // consensus failure or weird crash), but it is unclear how
  274. // they could be handled because the entire stack above the trie isn't
  275. // prepared to cope with missing state nodes.
  276. if glog.V(logger.Error) {
  277. glog.Errorf("Dangling hash node ref %x: %v", n, err)
  278. }
  279. return nil
  280. }
  281. dec := mustDecodeNode(n, enc)
  282. if dec != nil {
  283. globalCache.Put(n, dec)
  284. }
  285. return dec
  286. }
  287. // Root returns the root hash of the trie.
  288. // Deprecated: use Hash instead.
  289. func (t *Trie) Root() []byte { return t.Hash().Bytes() }
  290. // Hash returns the root hash of the trie. It does not write to the
  291. // database and can be used even if the trie doesn't have one.
  292. func (t *Trie) Hash() common.Hash {
  293. root, _ := t.hashRoot(nil)
  294. return common.BytesToHash(root.(hashNode))
  295. }
  296. // Commit writes all nodes to the trie's database.
  297. // Nodes are stored with their sha3 hash as the key.
  298. //
  299. // Committing flushes nodes from memory.
  300. // Subsequent Get calls will load nodes from the database.
  301. func (t *Trie) Commit() (root common.Hash, err error) {
  302. if t.db == nil {
  303. panic("Commit called on trie with nil database")
  304. }
  305. return t.CommitTo(t.db)
  306. }
  307. // CommitTo writes all nodes to the given database.
  308. // Nodes are stored with their sha3 hash as the key.
  309. //
  310. // Committing flushes nodes from memory. Subsequent Get calls will
  311. // load nodes from the trie's database. Calling code must ensure that
  312. // the changes made to db are written back to the trie's attached
  313. // database before using the trie.
  314. func (t *Trie) CommitTo(db DatabaseWriter) (root common.Hash, err error) {
  315. n, err := t.hashRoot(db)
  316. if err != nil {
  317. return (common.Hash{}), err
  318. }
  319. t.root = n
  320. return common.BytesToHash(n.(hashNode)), nil
  321. }
  322. func (t *Trie) hashRoot(db DatabaseWriter) (node, error) {
  323. if t.root == nil {
  324. return hashNode(emptyRoot.Bytes()), nil
  325. }
  326. if t.hasher == nil {
  327. t.hasher = newHasher()
  328. }
  329. return t.hasher.hash(t.root, db, true)
  330. }
  331. type hasher struct {
  332. tmp *bytes.Buffer
  333. sha hash.Hash
  334. }
  335. func newHasher() *hasher {
  336. return &hasher{tmp: new(bytes.Buffer), sha: sha3.NewKeccak256()}
  337. }
  338. func (h *hasher) hash(n node, db DatabaseWriter, force bool) (node, error) {
  339. hashed, err := h.replaceChildren(n, db)
  340. if err != nil {
  341. return hashNode{}, err
  342. }
  343. if n, err = h.store(hashed, db, force); err != nil {
  344. return hashNode{}, err
  345. }
  346. return n, nil
  347. }
  348. // hashChildren replaces child nodes of n with their hashes if the encoded
  349. // size of the child is larger than a hash.
  350. func (h *hasher) replaceChildren(n node, db DatabaseWriter) (node, error) {
  351. var err error
  352. switch n := n.(type) {
  353. case shortNode:
  354. n.Key = compactEncode(n.Key)
  355. if _, ok := n.Val.(valueNode); !ok {
  356. if n.Val, err = h.hash(n.Val, db, false); err != nil {
  357. return n, err
  358. }
  359. }
  360. if n.Val == nil {
  361. // Ensure that nil children are encoded as empty strings.
  362. n.Val = valueNode(nil)
  363. }
  364. return n, nil
  365. case fullNode:
  366. for i := 0; i < 16; i++ {
  367. if n[i] != nil {
  368. if n[i], err = h.hash(n[i], db, false); err != nil {
  369. return n, err
  370. }
  371. } else {
  372. // Ensure that nil children are encoded as empty strings.
  373. n[i] = valueNode(nil)
  374. }
  375. }
  376. if n[16] == nil {
  377. n[16] = valueNode(nil)
  378. }
  379. return n, nil
  380. default:
  381. return n, nil
  382. }
  383. }
  384. func (h *hasher) store(n node, db DatabaseWriter, force bool) (node, error) {
  385. // Don't store hashes or empty nodes.
  386. if _, isHash := n.(hashNode); n == nil || isHash {
  387. return n, nil
  388. }
  389. h.tmp.Reset()
  390. if err := rlp.Encode(h.tmp, n); err != nil {
  391. panic("encode error: " + err.Error())
  392. }
  393. if h.tmp.Len() < 32 && !force {
  394. // Nodes smaller than 32 bytes are stored inside their parent.
  395. return n, nil
  396. }
  397. // Larger nodes are replaced by their hash and stored in the database.
  398. h.sha.Reset()
  399. h.sha.Write(h.tmp.Bytes())
  400. key := hashNode(h.sha.Sum(nil))
  401. if db != nil {
  402. err := db.Put(key, h.tmp.Bytes())
  403. return key, err
  404. }
  405. return key, nil
  406. }