Browse Source

Merge pull request #2218 from karalabe/time-drift-warning

p2p/discover: NTP sanity check clock drift in case of expirations
Péter Szilágyi 9 years ago
parent
commit
1415669ac3
2 changed files with 149 additions and 4 deletions
  1. 127 0
      p2p/discover/ntp.go
  2. 22 4
      p2p/discover/udp.go

+ 127 - 0
p2p/discover/ntp.go

@@ -0,0 +1,127 @@
+// Copyright 2016 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+// Contains the NTP time drift detection via the SNTP protocol:
+//   https://tools.ietf.org/html/rfc4330
+
+package discover
+
+import (
+	"fmt"
+	"net"
+	"sort"
+	"strings"
+	"time"
+
+	"github.com/ethereum/go-ethereum/logger"
+	"github.com/ethereum/go-ethereum/logger/glog"
+)
+
+const (
+	ntpPool   = "pool.ntp.org" // ntpPool is the NTP server to query for the current time
+	ntpChecks = 3              // Number of measurements to do against the NTP server
+)
+
+// durationSlice attaches the methods of sort.Interface to []time.Duration,
+// sorting in increasing order.
+type durationSlice []time.Duration
+
+func (s durationSlice) Len() int           { return len(s) }
+func (s durationSlice) Less(i, j int) bool { return s[i] < s[j] }
+func (s durationSlice) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+
+// checkClockDrift queries an NTP server for clock drifts and warns the user if
+// one large enough is detected.
+func checkClockDrift() {
+	drift, err := sntpDrift(ntpChecks)
+	if err != nil {
+		return
+	}
+	if drift < -driftThreshold || drift > driftThreshold {
+		warning := fmt.Sprintf("System clock seems off by %v, which can prevent network connectivity", drift)
+		howtofix := fmt.Sprintf("Please enable network time synchronisation in system settings")
+		separator := strings.Repeat("-", len(warning))
+
+		glog.V(logger.Warn).Info(separator)
+		glog.V(logger.Warn).Info(warning)
+		glog.V(logger.Warn).Info(howtofix)
+		glog.V(logger.Warn).Info(separator)
+	} else {
+		glog.V(logger.Debug).Infof("Sanity NTP check reported %v drift, all ok", drift)
+	}
+}
+
+// sntpDrift does a naive time resolution against an NTP server and returns the
+// measured drift. This method uses the simple version of NTP. It's not precise
+// but should be fine for these purposes.
+//
+// Note, it executes two extra measurements compared to the number of requested
+// ones to be able to discard the two extremes as outliers.
+func sntpDrift(measurements int) (time.Duration, error) {
+	// Resolve the address of the NTP server
+	addr, err := net.ResolveUDPAddr("udp", ntpPool+":123")
+	if err != nil {
+		return 0, err
+	}
+	// Construct the time request (empty package with only 2 fields set):
+	//   Bits 3-5: Protocol version, 3
+	//   Bits 6-8: Mode of operation, client, 3
+	request := make([]byte, 48)
+	request[0] = 3<<3 | 3
+
+	// Execute each of the measurements
+	drifts := []time.Duration{}
+	for i := 0; i < measurements+2; i++ {
+		// Dial the NTP server and send the time retrieval request
+		conn, err := net.DialUDP("udp", nil, addr)
+		if err != nil {
+			return 0, err
+		}
+		defer conn.Close()
+
+		sent := time.Now()
+		if _, err = conn.Write(request); err != nil {
+			return 0, err
+		}
+		// Retrieve the reply and calculate the elapsed time
+		conn.SetDeadline(time.Now().Add(5 * time.Second))
+
+		reply := make([]byte, 48)
+		if _, err = conn.Read(reply); err != nil {
+			return 0, err
+		}
+		elapsed := time.Since(sent)
+
+		// Reconstruct the time from the reply data
+		sec := uint64(reply[43]) | uint64(reply[42])<<8 | uint64(reply[41])<<16 | uint64(reply[40])<<24
+		frac := uint64(reply[47]) | uint64(reply[46])<<8 | uint64(reply[45])<<16 | uint64(reply[44])<<24
+
+		nanosec := sec*1e9 + (frac*1e9)>>32
+
+		t := time.Date(1900, 1, 1, 0, 0, 0, 0, time.UTC).Add(time.Duration(nanosec)).Local()
+
+		// Calculate the drift based on an assumed answer time of RRT/2
+		drifts = append(drifts, sent.Sub(t)+elapsed/2)
+	}
+	// Calculate average drif (drop two extremities to avoid outliers)
+	sort.Sort(durationSlice(drifts))
+
+	drift := time.Duration(0)
+	for i := 1; i < len(drifts)-1; i++ {
+		drift += drifts[i]
+	}
+	return drift / time.Duration(measurements), nil
+}

+ 22 - 4
p2p/discover/udp.go

@@ -51,6 +51,10 @@ const (
 	respTimeout = 500 * time.Millisecond
 	sendTimeout = 500 * time.Millisecond
 	expiration  = 20 * time.Second
+
+	ntpFailureThreshold = 32               // Continuous timeouts after which to check NTP
+	ntpWarningCooldown  = 10 * time.Minute // Minimum amount of time to pass before repeating NTP warning
+	driftThreshold      = 10 * time.Second // Allowed clock drift before warning user
 )
 
 // RPC packet types
@@ -316,13 +320,15 @@ func (t *udp) handleReply(from NodeID, ptype byte, req packet) bool {
 	}
 }
 
-// loop runs in its own goroutin. it keeps track of
+// loop runs in its own goroutine. it keeps track of
 // the refresh timer and the pending reply queue.
 func (t *udp) loop() {
 	var (
-		plist       = list.New()
-		timeout     = time.NewTimer(0)
-		nextTimeout *pending // head of plist when timeout was last reset
+		plist        = list.New()
+		timeout      = time.NewTimer(0)
+		nextTimeout  *pending // head of plist when timeout was last reset
+		contTimeouts = 0      // number of continuous timeouts to do NTP checks
+		ntpWarnTime  = time.Unix(0, 0)
 	)
 	<-timeout.C // ignore first timeout
 	defer timeout.Stop()
@@ -377,19 +383,31 @@ func (t *udp) loop() {
 						p.errc <- nil
 						plist.Remove(el)
 					}
+					// Reset the continuous timeout counter (time drift detection)
+					contTimeouts = 0
 				}
 			}
 			r.matched <- matched
 
 		case now := <-timeout.C:
 			nextTimeout = nil
+
 			// Notify and remove callbacks whose deadline is in the past.
 			for el := plist.Front(); el != nil; el = el.Next() {
 				p := el.Value.(*pending)
 				if now.After(p.deadline) || now.Equal(p.deadline) {
 					p.errc <- errTimeout
 					plist.Remove(el)
+					contTimeouts++
+				}
+			}
+			// If we've accumulated too many timeouts, do an NTP time sync check
+			if contTimeouts > ntpFailureThreshold {
+				if time.Since(ntpWarnTime) >= ntpWarningCooldown {
+					ntpWarnTime = time.Now()
+					go checkClockDrift()
 				}
+				contTimeouts = 0
 			}
 		}
 	}