leveldb.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. // Copyright 2018 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. // +build !js
  17. // Package leveldb implements the key-value database layer based on LevelDB.
  18. package leveldb
  19. import (
  20. "fmt"
  21. "strconv"
  22. "strings"
  23. "sync"
  24. "time"
  25. "github.com/ethereum/go-ethereum/common"
  26. "github.com/ethereum/go-ethereum/ethdb"
  27. "github.com/ethereum/go-ethereum/log"
  28. "github.com/ethereum/go-ethereum/metrics"
  29. "github.com/syndtr/goleveldb/leveldb"
  30. "github.com/syndtr/goleveldb/leveldb/errors"
  31. "github.com/syndtr/goleveldb/leveldb/filter"
  32. "github.com/syndtr/goleveldb/leveldb/opt"
  33. "github.com/syndtr/goleveldb/leveldb/util"
  34. )
  35. const (
  36. // degradationWarnInterval specifies how often warning should be printed if the
  37. // leveldb database cannot keep up with requested writes.
  38. degradationWarnInterval = time.Minute
  39. // minCache is the minimum amount of memory in megabytes to allocate to leveldb
  40. // read and write caching, split half and half.
  41. minCache = 16
  42. // minHandles is the minimum number of files handles to allocate to the open
  43. // database files.
  44. minHandles = 16
  45. // metricsGatheringInterval specifies the interval to retrieve leveldb database
  46. // compaction, io and pause stats to report to the user.
  47. metricsGatheringInterval = 3 * time.Second
  48. )
  49. // Database is a persistent key-value store. Apart from basic data storage
  50. // functionality it also supports batch writes and iterating over the keyspace in
  51. // binary-alphabetical order.
  52. type Database struct {
  53. fn string // filename for reporting
  54. db *leveldb.DB // LevelDB instance
  55. compTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction
  56. compReadMeter metrics.Meter // Meter for measuring the data read during compaction
  57. compWriteMeter metrics.Meter // Meter for measuring the data written during compaction
  58. writeDelayNMeter metrics.Meter // Meter for measuring the write delay number due to database compaction
  59. writeDelayMeter metrics.Meter // Meter for measuring the write delay duration due to database compaction
  60. diskSizeGauge metrics.Gauge // Gauge for tracking the size of all the levels in the database
  61. diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read
  62. diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written
  63. memCompGauge metrics.Gauge // Gauge for tracking the number of memory compaction
  64. level0CompGauge metrics.Gauge // Gauge for tracking the number of table compaction in level0
  65. nonlevel0CompGauge metrics.Gauge // Gauge for tracking the number of table compaction in non0 level
  66. seekCompGauge metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt
  67. quitLock sync.Mutex // Mutex protecting the quit channel access
  68. quitChan chan chan error // Quit channel to stop the metrics collection before closing the database
  69. log log.Logger // Contextual logger tracking the database path
  70. }
  71. // New returns a wrapped LevelDB object. The namespace is the prefix that the
  72. // metrics reporting should use for surfacing internal stats.
  73. func New(file string, cache int, handles int, namespace string) (*Database, error) {
  74. // Ensure we have some minimal caching and file guarantees
  75. if cache < minCache {
  76. cache = minCache
  77. }
  78. if handles < minHandles {
  79. handles = minHandles
  80. }
  81. logger := log.New("database", file)
  82. logger.Info("Allocated cache and file handles", "cache", common.StorageSize(cache*1024*1024), "handles", handles)
  83. // Open the db and recover any potential corruptions
  84. db, err := leveldb.OpenFile(file, &opt.Options{
  85. OpenFilesCacheCapacity: handles,
  86. BlockCacheCapacity: cache / 2 * opt.MiB,
  87. WriteBuffer: cache / 4 * opt.MiB, // Two of these are used internally
  88. Filter: filter.NewBloomFilter(10),
  89. DisableSeeksCompaction: true,
  90. })
  91. if _, corrupted := err.(*errors.ErrCorrupted); corrupted {
  92. db, err = leveldb.RecoverFile(file, nil)
  93. }
  94. if err != nil {
  95. return nil, err
  96. }
  97. // Assemble the wrapper with all the registered metrics
  98. ldb := &Database{
  99. fn: file,
  100. db: db,
  101. log: logger,
  102. quitChan: make(chan chan error),
  103. }
  104. ldb.compTimeMeter = metrics.NewRegisteredMeter(namespace+"compact/time", nil)
  105. ldb.compReadMeter = metrics.NewRegisteredMeter(namespace+"compact/input", nil)
  106. ldb.compWriteMeter = metrics.NewRegisteredMeter(namespace+"compact/output", nil)
  107. ldb.diskSizeGauge = metrics.NewRegisteredGauge(namespace+"disk/size", nil)
  108. ldb.diskReadMeter = metrics.NewRegisteredMeter(namespace+"disk/read", nil)
  109. ldb.diskWriteMeter = metrics.NewRegisteredMeter(namespace+"disk/write", nil)
  110. ldb.writeDelayMeter = metrics.NewRegisteredMeter(namespace+"compact/writedelay/duration", nil)
  111. ldb.writeDelayNMeter = metrics.NewRegisteredMeter(namespace+"compact/writedelay/counter", nil)
  112. ldb.memCompGauge = metrics.NewRegisteredGauge(namespace+"compact/memory", nil)
  113. ldb.level0CompGauge = metrics.NewRegisteredGauge(namespace+"compact/level0", nil)
  114. ldb.nonlevel0CompGauge = metrics.NewRegisteredGauge(namespace+"compact/nonlevel0", nil)
  115. ldb.seekCompGauge = metrics.NewRegisteredGauge(namespace+"compact/seek", nil)
  116. // Start up the metrics gathering and return
  117. go ldb.meter(metricsGatheringInterval)
  118. return ldb, nil
  119. }
  120. // Close stops the metrics collection, flushes any pending data to disk and closes
  121. // all io accesses to the underlying key-value store.
  122. func (db *Database) Close() error {
  123. db.quitLock.Lock()
  124. defer db.quitLock.Unlock()
  125. if db.quitChan != nil {
  126. errc := make(chan error)
  127. db.quitChan <- errc
  128. if err := <-errc; err != nil {
  129. db.log.Error("Metrics collection failed", "err", err)
  130. }
  131. db.quitChan = nil
  132. }
  133. return db.db.Close()
  134. }
  135. // Has retrieves if a key is present in the key-value store.
  136. func (db *Database) Has(key []byte) (bool, error) {
  137. return db.db.Has(key, nil)
  138. }
  139. // Get retrieves the given key if it's present in the key-value store.
  140. func (db *Database) Get(key []byte) ([]byte, error) {
  141. dat, err := db.db.Get(key, nil)
  142. if err != nil {
  143. return nil, err
  144. }
  145. return dat, nil
  146. }
  147. // Put inserts the given value into the key-value store.
  148. func (db *Database) Put(key []byte, value []byte) error {
  149. return db.db.Put(key, value, nil)
  150. }
  151. // Delete removes the key from the key-value store.
  152. func (db *Database) Delete(key []byte) error {
  153. return db.db.Delete(key, nil)
  154. }
  155. // NewBatch creates a write-only key-value store that buffers changes to its host
  156. // database until a final write is called.
  157. func (db *Database) NewBatch() ethdb.Batch {
  158. return &batch{
  159. db: db.db,
  160. b: new(leveldb.Batch),
  161. }
  162. }
  163. // NewIterator creates a binary-alphabetical iterator over a subset
  164. // of database content with a particular key prefix, starting at a particular
  165. // initial key (or after, if it does not exist).
  166. func (db *Database) NewIterator(prefix []byte, start []byte) ethdb.Iterator {
  167. return db.db.NewIterator(bytesPrefixRange(prefix, start), nil)
  168. }
  169. // Stat returns a particular internal stat of the database.
  170. func (db *Database) Stat(property string) (string, error) {
  171. return db.db.GetProperty(property)
  172. }
  173. // Compact flattens the underlying data store for the given key range. In essence,
  174. // deleted and overwritten versions are discarded, and the data is rearranged to
  175. // reduce the cost of operations needed to access them.
  176. //
  177. // A nil start is treated as a key before all keys in the data store; a nil limit
  178. // is treated as a key after all keys in the data store. If both is nil then it
  179. // will compact entire data store.
  180. func (db *Database) Compact(start []byte, limit []byte) error {
  181. return db.db.CompactRange(util.Range{Start: start, Limit: limit})
  182. }
  183. // Path returns the path to the database directory.
  184. func (db *Database) Path() string {
  185. return db.fn
  186. }
  187. // meter periodically retrieves internal leveldb counters and reports them to
  188. // the metrics subsystem.
  189. //
  190. // This is how a LevelDB stats table looks like (currently):
  191. // Compactions
  192. // Level | Tables | Size(MB) | Time(sec) | Read(MB) | Write(MB)
  193. // -------+------------+---------------+---------------+---------------+---------------
  194. // 0 | 0 | 0.00000 | 1.27969 | 0.00000 | 12.31098
  195. // 1 | 85 | 109.27913 | 28.09293 | 213.92493 | 214.26294
  196. // 2 | 523 | 1000.37159 | 7.26059 | 66.86342 | 66.77884
  197. // 3 | 570 | 1113.18458 | 0.00000 | 0.00000 | 0.00000
  198. //
  199. // This is how the write delay look like (currently):
  200. // DelayN:5 Delay:406.604657ms Paused: false
  201. //
  202. // This is how the iostats look like (currently):
  203. // Read(MB):3895.04860 Write(MB):3654.64712
  204. func (db *Database) meter(refresh time.Duration) {
  205. // Create the counters to store current and previous compaction values
  206. compactions := make([][]float64, 2)
  207. for i := 0; i < 2; i++ {
  208. compactions[i] = make([]float64, 4)
  209. }
  210. // Create storage for iostats.
  211. var iostats [2]float64
  212. // Create storage and warning log tracer for write delay.
  213. var (
  214. delaystats [2]int64
  215. lastWritePaused time.Time
  216. )
  217. var (
  218. errc chan error
  219. merr error
  220. )
  221. timer := time.NewTimer(refresh)
  222. defer timer.Stop()
  223. // Iterate ad infinitum and collect the stats
  224. for i := 1; errc == nil && merr == nil; i++ {
  225. // Retrieve the database stats
  226. stats, err := db.db.GetProperty("leveldb.stats")
  227. if err != nil {
  228. db.log.Error("Failed to read database stats", "err", err)
  229. merr = err
  230. continue
  231. }
  232. // Find the compaction table, skip the header
  233. lines := strings.Split(stats, "\n")
  234. for len(lines) > 0 && strings.TrimSpace(lines[0]) != "Compactions" {
  235. lines = lines[1:]
  236. }
  237. if len(lines) <= 3 {
  238. db.log.Error("Compaction leveldbTable not found")
  239. merr = errors.New("compaction leveldbTable not found")
  240. continue
  241. }
  242. lines = lines[3:]
  243. // Iterate over all the leveldbTable rows, and accumulate the entries
  244. for j := 0; j < len(compactions[i%2]); j++ {
  245. compactions[i%2][j] = 0
  246. }
  247. for _, line := range lines {
  248. parts := strings.Split(line, "|")
  249. if len(parts) != 6 {
  250. break
  251. }
  252. for idx, counter := range parts[2:] {
  253. value, err := strconv.ParseFloat(strings.TrimSpace(counter), 64)
  254. if err != nil {
  255. db.log.Error("Compaction entry parsing failed", "err", err)
  256. merr = err
  257. continue
  258. }
  259. compactions[i%2][idx] += value
  260. }
  261. }
  262. // Update all the requested meters
  263. if db.diskSizeGauge != nil {
  264. db.diskSizeGauge.Update(int64(compactions[i%2][0] * 1024 * 1024))
  265. }
  266. if db.compTimeMeter != nil {
  267. db.compTimeMeter.Mark(int64((compactions[i%2][1] - compactions[(i-1)%2][1]) * 1000 * 1000 * 1000))
  268. }
  269. if db.compReadMeter != nil {
  270. db.compReadMeter.Mark(int64((compactions[i%2][2] - compactions[(i-1)%2][2]) * 1024 * 1024))
  271. }
  272. if db.compWriteMeter != nil {
  273. db.compWriteMeter.Mark(int64((compactions[i%2][3] - compactions[(i-1)%2][3]) * 1024 * 1024))
  274. }
  275. // Retrieve the write delay statistic
  276. writedelay, err := db.db.GetProperty("leveldb.writedelay")
  277. if err != nil {
  278. db.log.Error("Failed to read database write delay statistic", "err", err)
  279. merr = err
  280. continue
  281. }
  282. var (
  283. delayN int64
  284. delayDuration string
  285. duration time.Duration
  286. paused bool
  287. )
  288. if n, err := fmt.Sscanf(writedelay, "DelayN:%d Delay:%s Paused:%t", &delayN, &delayDuration, &paused); n != 3 || err != nil {
  289. db.log.Error("Write delay statistic not found")
  290. merr = err
  291. continue
  292. }
  293. duration, err = time.ParseDuration(delayDuration)
  294. if err != nil {
  295. db.log.Error("Failed to parse delay duration", "err", err)
  296. merr = err
  297. continue
  298. }
  299. if db.writeDelayNMeter != nil {
  300. db.writeDelayNMeter.Mark(delayN - delaystats[0])
  301. }
  302. if db.writeDelayMeter != nil {
  303. db.writeDelayMeter.Mark(duration.Nanoseconds() - delaystats[1])
  304. }
  305. // If a warning that db is performing compaction has been displayed, any subsequent
  306. // warnings will be withheld for one minute not to overwhelm the user.
  307. if paused && delayN-delaystats[0] == 0 && duration.Nanoseconds()-delaystats[1] == 0 &&
  308. time.Now().After(lastWritePaused.Add(degradationWarnInterval)) {
  309. db.log.Warn("Database compacting, degraded performance")
  310. lastWritePaused = time.Now()
  311. }
  312. delaystats[0], delaystats[1] = delayN, duration.Nanoseconds()
  313. // Retrieve the database iostats.
  314. ioStats, err := db.db.GetProperty("leveldb.iostats")
  315. if err != nil {
  316. db.log.Error("Failed to read database iostats", "err", err)
  317. merr = err
  318. continue
  319. }
  320. var nRead, nWrite float64
  321. parts := strings.Split(ioStats, " ")
  322. if len(parts) < 2 {
  323. db.log.Error("Bad syntax of ioStats", "ioStats", ioStats)
  324. merr = fmt.Errorf("bad syntax of ioStats %s", ioStats)
  325. continue
  326. }
  327. if n, err := fmt.Sscanf(parts[0], "Read(MB):%f", &nRead); n != 1 || err != nil {
  328. db.log.Error("Bad syntax of read entry", "entry", parts[0])
  329. merr = err
  330. continue
  331. }
  332. if n, err := fmt.Sscanf(parts[1], "Write(MB):%f", &nWrite); n != 1 || err != nil {
  333. db.log.Error("Bad syntax of write entry", "entry", parts[1])
  334. merr = err
  335. continue
  336. }
  337. if db.diskReadMeter != nil {
  338. db.diskReadMeter.Mark(int64((nRead - iostats[0]) * 1024 * 1024))
  339. }
  340. if db.diskWriteMeter != nil {
  341. db.diskWriteMeter.Mark(int64((nWrite - iostats[1]) * 1024 * 1024))
  342. }
  343. iostats[0], iostats[1] = nRead, nWrite
  344. compCount, err := db.db.GetProperty("leveldb.compcount")
  345. if err != nil {
  346. db.log.Error("Failed to read database iostats", "err", err)
  347. merr = err
  348. continue
  349. }
  350. var (
  351. memComp uint32
  352. level0Comp uint32
  353. nonLevel0Comp uint32
  354. seekComp uint32
  355. )
  356. if n, err := fmt.Sscanf(compCount, "MemComp:%d Level0Comp:%d NonLevel0Comp:%d SeekComp:%d", &memComp, &level0Comp, &nonLevel0Comp, &seekComp); n != 4 || err != nil {
  357. db.log.Error("Compaction count statistic not found")
  358. merr = err
  359. continue
  360. }
  361. db.memCompGauge.Update(int64(memComp))
  362. db.level0CompGauge.Update(int64(level0Comp))
  363. db.nonlevel0CompGauge.Update(int64(nonLevel0Comp))
  364. db.seekCompGauge.Update(int64(seekComp))
  365. // Sleep a bit, then repeat the stats collection
  366. select {
  367. case errc = <-db.quitChan:
  368. // Quit requesting, stop hammering the database
  369. case <-timer.C:
  370. timer.Reset(refresh)
  371. // Timeout, gather a new set of stats
  372. }
  373. }
  374. if errc == nil {
  375. errc = <-db.quitChan
  376. }
  377. errc <- merr
  378. }
  379. // batch is a write-only leveldb batch that commits changes to its host database
  380. // when Write is called. A batch cannot be used concurrently.
  381. type batch struct {
  382. db *leveldb.DB
  383. b *leveldb.Batch
  384. size int
  385. }
  386. // Put inserts the given value into the batch for later committing.
  387. func (b *batch) Put(key, value []byte) error {
  388. b.b.Put(key, value)
  389. b.size += len(value)
  390. return nil
  391. }
  392. // Delete inserts the a key removal into the batch for later committing.
  393. func (b *batch) Delete(key []byte) error {
  394. b.b.Delete(key)
  395. b.size++
  396. return nil
  397. }
  398. // ValueSize retrieves the amount of data queued up for writing.
  399. func (b *batch) ValueSize() int {
  400. return b.size
  401. }
  402. // Write flushes any accumulated data to disk.
  403. func (b *batch) Write() error {
  404. return b.db.Write(b.b, nil)
  405. }
  406. // Reset resets the batch for reuse.
  407. func (b *batch) Reset() {
  408. b.b.Reset()
  409. b.size = 0
  410. }
  411. // Replay replays the batch contents.
  412. func (b *batch) Replay(w ethdb.KeyValueWriter) error {
  413. return b.b.Replay(&replayer{writer: w})
  414. }
  415. // replayer is a small wrapper to implement the correct replay methods.
  416. type replayer struct {
  417. writer ethdb.KeyValueWriter
  418. failure error
  419. }
  420. // Put inserts the given value into the key-value data store.
  421. func (r *replayer) Put(key, value []byte) {
  422. // If the replay already failed, stop executing ops
  423. if r.failure != nil {
  424. return
  425. }
  426. r.failure = r.writer.Put(key, value)
  427. }
  428. // Delete removes the key from the key-value data store.
  429. func (r *replayer) Delete(key []byte) {
  430. // If the replay already failed, stop executing ops
  431. if r.failure != nil {
  432. return
  433. }
  434. r.failure = r.writer.Delete(key)
  435. }
  436. // bytesPrefixRange returns key range that satisfy
  437. // - the given prefix, and
  438. // - the given seek position
  439. func bytesPrefixRange(prefix, start []byte) *util.Range {
  440. r := util.BytesPrefix(prefix)
  441. r.Start = append(r.Start, start...)
  442. return r
  443. }