sync_bloom.go 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. // Copyright 2019 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. package trie
  17. import (
  18. "encoding/binary"
  19. "fmt"
  20. "sync"
  21. "sync/atomic"
  22. "time"
  23. "github.com/ethereum/go-ethereum/common"
  24. "github.com/ethereum/go-ethereum/common/gopool"
  25. "github.com/ethereum/go-ethereum/core/rawdb"
  26. "github.com/ethereum/go-ethereum/ethdb"
  27. "github.com/ethereum/go-ethereum/log"
  28. "github.com/ethereum/go-ethereum/metrics"
  29. bloomfilter "github.com/holiman/bloomfilter/v2"
  30. )
  31. var (
  32. bloomAddMeter = metrics.NewRegisteredMeter("trie/bloom/add", nil)
  33. bloomLoadMeter = metrics.NewRegisteredMeter("trie/bloom/load", nil)
  34. bloomTestMeter = metrics.NewRegisteredMeter("trie/bloom/test", nil)
  35. bloomMissMeter = metrics.NewRegisteredMeter("trie/bloom/miss", nil)
  36. bloomFaultMeter = metrics.NewRegisteredMeter("trie/bloom/fault", nil)
  37. bloomErrorGauge = metrics.NewRegisteredGauge("trie/bloom/error", nil)
  38. )
  39. // SyncBloom is a bloom filter used during fast sync to quickly decide if a trie
  40. // node or contract code already exists on disk or not. It self populates from the
  41. // provided disk database on creation in a background thread and will only start
  42. // returning live results once that's finished.
  43. type SyncBloom struct {
  44. bloom *bloomfilter.Filter
  45. inited uint32
  46. closer sync.Once
  47. closed uint32
  48. pend sync.WaitGroup
  49. }
  50. // NewSyncBloom creates a new bloom filter of the given size (in megabytes) and
  51. // initializes it from the database. The bloom is hard coded to use 3 filters.
  52. func NewSyncBloom(memory uint64, database ethdb.Iteratee) *SyncBloom {
  53. // Create the bloom filter to track known trie nodes
  54. bloom, err := bloomfilter.New(memory*1024*1024*8, 4)
  55. if err != nil {
  56. panic(fmt.Sprintf("failed to create bloom: %v", err))
  57. }
  58. log.Info("Allocated fast sync bloom", "size", common.StorageSize(memory*1024*1024))
  59. // Assemble the fast sync bloom and init it from previous sessions
  60. b := &SyncBloom{
  61. bloom: bloom,
  62. }
  63. b.pend.Add(2)
  64. gopool.Submit(func() {
  65. defer b.pend.Done()
  66. b.init(database)
  67. })
  68. gopool.Submit(func() {
  69. defer b.pend.Done()
  70. b.meter()
  71. })
  72. return b
  73. }
  74. // init iterates over the database, pushing every trie hash into the bloom filter.
  75. func (b *SyncBloom) init(database ethdb.Iteratee) {
  76. // Iterate over the database, but restart every now and again to avoid holding
  77. // a persistent snapshot since fast sync can push a ton of data concurrently,
  78. // bloating the disk.
  79. //
  80. // Note, this is fine, because everything inserted into leveldb by fast sync is
  81. // also pushed into the bloom directly, so we're not missing anything when the
  82. // iterator is swapped out for a new one.
  83. it := database.NewIterator(nil, nil)
  84. var (
  85. start = time.Now()
  86. swap = time.Now()
  87. )
  88. for it.Next() && atomic.LoadUint32(&b.closed) == 0 {
  89. // If the database entry is a trie node, add it to the bloom
  90. key := it.Key()
  91. if len(key) == common.HashLength {
  92. b.bloom.AddHash(binary.BigEndian.Uint64(key))
  93. bloomLoadMeter.Mark(1)
  94. } else if ok, hash := rawdb.IsCodeKey(key); ok {
  95. // If the database entry is a contract code, add it to the bloom
  96. b.bloom.AddHash(binary.BigEndian.Uint64(hash))
  97. bloomLoadMeter.Mark(1)
  98. }
  99. // If enough time elapsed since the last iterator swap, restart
  100. if time.Since(swap) > 8*time.Second {
  101. key := common.CopyBytes(it.Key())
  102. it.Release()
  103. it = database.NewIterator(nil, key)
  104. log.Info("Initializing state bloom", "items", b.bloom.N(), "errorrate", b.bloom.FalsePosititveProbability(), "elapsed", common.PrettyDuration(time.Since(start)))
  105. swap = time.Now()
  106. }
  107. }
  108. it.Release()
  109. // Mark the bloom filter inited and return
  110. log.Info("Initialized state bloom", "items", b.bloom.N(), "errorrate", b.bloom.FalsePosititveProbability(), "elapsed", common.PrettyDuration(time.Since(start)))
  111. atomic.StoreUint32(&b.inited, 1)
  112. }
  113. // meter periodically recalculates the false positive error rate of the bloom
  114. // filter and reports it in a metric.
  115. func (b *SyncBloom) meter() {
  116. for {
  117. // Report the current error ration. No floats, lame, scale it up.
  118. bloomErrorGauge.Update(int64(b.bloom.FalsePosititveProbability() * 100000))
  119. // Wait one second, but check termination more frequently
  120. for i := 0; i < 10; i++ {
  121. if atomic.LoadUint32(&b.closed) == 1 {
  122. return
  123. }
  124. time.Sleep(100 * time.Millisecond)
  125. }
  126. }
  127. }
  128. // Close terminates any background initializer still running and releases all the
  129. // memory allocated for the bloom.
  130. func (b *SyncBloom) Close() error {
  131. b.closer.Do(func() {
  132. // Ensure the initializer is stopped
  133. atomic.StoreUint32(&b.closed, 1)
  134. b.pend.Wait()
  135. // Wipe the bloom, but mark it "uninited" just in case someone attempts an access
  136. log.Info("Deallocated state bloom", "items", b.bloom.N(), "errorrate", b.bloom.FalsePosititveProbability())
  137. atomic.StoreUint32(&b.inited, 0)
  138. b.bloom = nil
  139. })
  140. return nil
  141. }
  142. // Add inserts a new trie node hash into the bloom filter.
  143. func (b *SyncBloom) Add(hash []byte) {
  144. if atomic.LoadUint32(&b.closed) == 1 {
  145. return
  146. }
  147. b.bloom.AddHash(binary.BigEndian.Uint64(hash))
  148. bloomAddMeter.Mark(1)
  149. }
  150. // Contains tests if the bloom filter contains the given hash:
  151. // - false: the bloom definitely does not contain hash
  152. // - true: the bloom maybe contains hash
  153. //
  154. // While the bloom is being initialized, any query will return true.
  155. func (b *SyncBloom) Contains(hash []byte) bool {
  156. bloomTestMeter.Mark(1)
  157. if atomic.LoadUint32(&b.inited) == 0 {
  158. // We didn't load all the trie nodes from the previous run of Geth yet. As
  159. // such, we can't say for sure if a hash is not present for anything. Until
  160. // the init is done, we're faking "possible presence" for everything.
  161. return true
  162. }
  163. // Bloom initialized, check the real one and report any successful misses
  164. maybe := b.bloom.ContainsHash(binary.BigEndian.Uint64(hash))
  165. if !maybe {
  166. bloomMissMeter.Mark(1)
  167. }
  168. return maybe
  169. }