Просмотр исходного кода

ethdb: add leveldb write delay statistic (#16499)

gary rong 7 лет назад
Родитель
Сommit
6b2b328cdb
1 измененных файлов с 81 добавлено и 14 удалено
  1. 81 14
      ethdb/database.go

+ 81 - 14
ethdb/database.go

@@ -17,6 +17,7 @@
 package ethdb
 
 import (
+	"fmt"
 	"strconv"
 	"strings"
 	"sync"
@@ -32,17 +33,25 @@ import (
 	"github.com/syndtr/goleveldb/leveldb/util"
 )
 
+const (
+	writeDelayNThreshold       = 200
+	writeDelayThreshold        = 350 * time.Millisecond
+	writeDelayWarningThrottler = 1 * time.Minute
+)
+
 var OpenFileLimit = 64
 
 type LDBDatabase struct {
 	fn string      // filename for reporting
 	db *leveldb.DB // LevelDB instance
 
-	compTimeMeter  metrics.Meter // Meter for measuring the total time spent in database compaction
-	compReadMeter  metrics.Meter // Meter for measuring the data read during compaction
-	compWriteMeter metrics.Meter // Meter for measuring the data written during compaction
-	diskReadMeter  metrics.Meter // Meter for measuring the effective amount of data read
-	diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written
+	compTimeMeter    metrics.Meter // Meter for measuring the total time spent in database compaction
+	compReadMeter    metrics.Meter // Meter for measuring the data read during compaction
+	compWriteMeter   metrics.Meter // Meter for measuring the data written during compaction
+	writeDelayNMeter metrics.Meter // Meter for measuring the write delay number due to database compaction
+	writeDelayMeter  metrics.Meter // Meter for measuring the write delay duration due to database compaction
+	diskReadMeter    metrics.Meter // Meter for measuring the effective amount of data read
+	diskWriteMeter   metrics.Meter // Meter for measuring the effective amount of data written
 
 	quitLock sync.Mutex      // Mutex protecting the quit channel access
 	quitChan chan chan error // Quit channel to stop the metrics collection before closing the database
@@ -147,16 +156,17 @@ func (db *LDBDatabase) LDB() *leveldb.DB {
 
 // Meter configures the database metrics collectors and
 func (db *LDBDatabase) Meter(prefix string) {
-	// Short circuit metering if the metrics system is disabled
-	if !metrics.Enabled {
-		return
+	if metrics.Enabled {
+		// Initialize all the metrics collector at the requested prefix
+		db.compTimeMeter = metrics.NewRegisteredMeter(prefix+"compact/time", nil)
+		db.compReadMeter = metrics.NewRegisteredMeter(prefix+"compact/input", nil)
+		db.compWriteMeter = metrics.NewRegisteredMeter(prefix+"compact/output", nil)
+		db.diskReadMeter = metrics.NewRegisteredMeter(prefix+"disk/read", nil)
+		db.diskWriteMeter = metrics.NewRegisteredMeter(prefix+"disk/write", nil)
 	}
-	// Initialize all the metrics collector at the requested prefix
-	db.compTimeMeter = metrics.NewRegisteredMeter(prefix+"compact/time", nil)
-	db.compReadMeter = metrics.NewRegisteredMeter(prefix+"compact/input", nil)
-	db.compWriteMeter = metrics.NewRegisteredMeter(prefix+"compact/output", nil)
-	db.diskReadMeter = metrics.NewRegisteredMeter(prefix+"disk/read", nil)
-	db.diskWriteMeter = metrics.NewRegisteredMeter(prefix+"disk/write", nil)
+	// Initialize write delay metrics no matter we are in metric mode or not.
+	db.writeDelayMeter = metrics.NewRegisteredMeter(prefix+"compact/writedelay/duration", nil)
+	db.writeDelayNMeter = metrics.NewRegisteredMeter(prefix+"compact/writedelay/counter", nil)
 
 	// Create a quit channel for the periodic collector and run it
 	db.quitLock.Lock()
@@ -178,6 +188,9 @@ func (db *LDBDatabase) Meter(prefix string) {
 //      2   |        523 |    1000.37159 |       7.26059 |      66.86342 |      66.77884
 //      3   |        570 |    1113.18458 |       0.00000 |       0.00000 |       0.00000
 //
+// This is how the write delay look like (currently):
+// DelayN:5 Delay:406.604657ms
+//
 // This is how the iostats look like (currently):
 // Read(MB):3895.04860 Write(MB):3654.64712
 func (db *LDBDatabase) meter(refresh time.Duration) {
@@ -188,6 +201,14 @@ func (db *LDBDatabase) meter(refresh time.Duration) {
 	}
 	// Create storage for iostats.
 	var iostats [2]float64
+
+	// Create storage and warning log tracer for write delay.
+	var (
+		delaystats      [2]int64
+		lastWriteDelay  time.Time
+		lastWriteDelayN time.Time
+	)
+
 	// Iterate ad infinitum and collect the stats
 	for i := 1; ; i++ {
 		// Retrieve the database stats
@@ -236,6 +257,52 @@ func (db *LDBDatabase) meter(refresh time.Duration) {
 			db.compWriteMeter.Mark(int64((compactions[i%2][2] - compactions[(i-1)%2][2]) * 1024 * 1024))
 		}
 
+		// Retrieve the write delay statistic
+		writedelay, err := db.db.GetProperty("leveldb.writedelay")
+		if err != nil {
+			db.log.Error("Failed to read database write delay statistic", "err", err)
+			return
+		}
+		var (
+			delayN        int64
+			delayDuration string
+			duration      time.Duration
+		)
+		if n, err := fmt.Sscanf(writedelay, "DelayN:%d Delay:%s", &delayN, &delayDuration); n != 2 || err != nil {
+			db.log.Error("Write delay statistic not found")
+			return
+		}
+		duration, err = time.ParseDuration(delayDuration)
+		if err != nil {
+			db.log.Error("Failed to parse delay duration", "err", err)
+			return
+		}
+		if db.writeDelayNMeter != nil {
+			db.writeDelayNMeter.Mark(delayN - delaystats[0])
+			// If the write delay number been collected in the last minute exceeds the predefined threshold,
+			// print a warning log here.
+			// If a warning that db performance is laggy has been displayed,
+			// any subsequent warnings will be withhold for 1 minute to don't overwhelm the user.
+			if int(db.writeDelayNMeter.Rate1()) > writeDelayNThreshold &&
+				time.Now().After(lastWriteDelayN.Add(writeDelayWarningThrottler)) {
+				db.log.Warn("Write delay number exceeds the threshold (200 per second) in the last minute")
+				lastWriteDelayN = time.Now()
+			}
+		}
+		if db.writeDelayMeter != nil {
+			db.writeDelayMeter.Mark(duration.Nanoseconds() - delaystats[1])
+			// If the write delay duration been collected in the last minute exceeds the predefined threshold,
+			// print a warning log here.
+			// If a warning that db performance is laggy has been displayed,
+			// any subsequent warnings will be withhold for 1 minute to don't overwhelm the user.
+			if int64(db.writeDelayMeter.Rate1()) > writeDelayThreshold.Nanoseconds() &&
+				time.Now().After(lastWriteDelay.Add(writeDelayWarningThrottler)) {
+				db.log.Warn("Write delay duration exceeds the threshold (35% of the time) in the last minute")
+				lastWriteDelay = time.Now()
+			}
+		}
+		delaystats[0], delaystats[1] = delayN, duration.Nanoseconds()
+
 		// Retrieve the database iostats.
 		ioStats, err := db.db.GetProperty("leveldb.iostats")
 		if err != nil {