dial.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. // Copyright 2015 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. package p2p
  17. import (
  18. "container/heap"
  19. "crypto/rand"
  20. "errors"
  21. "fmt"
  22. "net"
  23. "time"
  24. "github.com/ethereum/go-ethereum/logger"
  25. "github.com/ethereum/go-ethereum/logger/glog"
  26. "github.com/ethereum/go-ethereum/p2p/discover"
  27. "github.com/ethereum/go-ethereum/p2p/netutil"
  28. )
  29. const (
  30. // This is the amount of time spent waiting in between
  31. // redialing a certain node.
  32. dialHistoryExpiration = 30 * time.Second
  33. // Discovery lookups are throttled and can only run
  34. // once every few seconds.
  35. lookupInterval = 4 * time.Second
  36. // Endpoint resolution is throttled with bounded backoff.
  37. initialResolveDelay = 60 * time.Second
  38. maxResolveDelay = time.Hour
  39. )
  40. // dialstate schedules dials and discovery lookups.
  41. // it get's a chance to compute new tasks on every iteration
  42. // of the main loop in Server.run.
  43. type dialstate struct {
  44. maxDynDials int
  45. ntab discoverTable
  46. netrestrict *netutil.Netlist
  47. lookupRunning bool
  48. dialing map[discover.NodeID]connFlag
  49. lookupBuf []*discover.Node // current discovery lookup results
  50. randomNodes []*discover.Node // filled from Table
  51. static map[discover.NodeID]*dialTask
  52. hist *dialHistory
  53. }
  54. type discoverTable interface {
  55. Self() *discover.Node
  56. Close()
  57. Resolve(target discover.NodeID) *discover.Node
  58. Lookup(target discover.NodeID) []*discover.Node
  59. ReadRandomNodes([]*discover.Node) int
  60. }
  61. // the dial history remembers recent dials.
  62. type dialHistory []pastDial
  63. // pastDial is an entry in the dial history.
  64. type pastDial struct {
  65. id discover.NodeID
  66. exp time.Time
  67. }
  68. type task interface {
  69. Do(*Server)
  70. }
  71. // A dialTask is generated for each node that is dialed. Its
  72. // fields cannot be accessed while the task is running.
  73. type dialTask struct {
  74. flags connFlag
  75. dest *discover.Node
  76. lastResolved time.Time
  77. resolveDelay time.Duration
  78. }
  79. // discoverTask runs discovery table operations.
  80. // Only one discoverTask is active at any time.
  81. // discoverTask.Do performs a random lookup.
  82. type discoverTask struct {
  83. results []*discover.Node
  84. }
  85. // A waitExpireTask is generated if there are no other tasks
  86. // to keep the loop in Server.run ticking.
  87. type waitExpireTask struct {
  88. time.Duration
  89. }
  90. func newDialState(static []*discover.Node, ntab discoverTable, maxdyn int, netrestrict *netutil.Netlist) *dialstate {
  91. s := &dialstate{
  92. maxDynDials: maxdyn,
  93. ntab: ntab,
  94. netrestrict: netrestrict,
  95. static: make(map[discover.NodeID]*dialTask),
  96. dialing: make(map[discover.NodeID]connFlag),
  97. randomNodes: make([]*discover.Node, maxdyn/2),
  98. hist: new(dialHistory),
  99. }
  100. for _, n := range static {
  101. s.addStatic(n)
  102. }
  103. return s
  104. }
  105. func (s *dialstate) addStatic(n *discover.Node) {
  106. // This overwites the task instead of updating an existing
  107. // entry, giving users the opportunity to force a resolve operation.
  108. s.static[n.ID] = &dialTask{flags: staticDialedConn, dest: n}
  109. }
  110. func (s *dialstate) removeStatic(n *discover.Node) {
  111. // This removes a task so future attempts to connect will not be made.
  112. delete(s.static, n.ID)
  113. }
  114. func (s *dialstate) newTasks(nRunning int, peers map[discover.NodeID]*Peer, now time.Time) []task {
  115. var newtasks []task
  116. addDial := func(flag connFlag, n *discover.Node) bool {
  117. if err := s.checkDial(n, peers); err != nil {
  118. glog.V(logger.Debug).Infof("skipping dial candidate %x@%v:%d: %v", n.ID[:8], n.IP, n.TCP, err)
  119. return false
  120. }
  121. s.dialing[n.ID] = flag
  122. newtasks = append(newtasks, &dialTask{flags: flag, dest: n})
  123. return true
  124. }
  125. // Compute number of dynamic dials necessary at this point.
  126. needDynDials := s.maxDynDials
  127. for _, p := range peers {
  128. if p.rw.is(dynDialedConn) {
  129. needDynDials--
  130. }
  131. }
  132. for _, flag := range s.dialing {
  133. if flag&dynDialedConn != 0 {
  134. needDynDials--
  135. }
  136. }
  137. // Expire the dial history on every invocation.
  138. s.hist.expire(now)
  139. // Create dials for static nodes if they are not connected.
  140. for id, t := range s.static {
  141. err := s.checkDial(t.dest, peers)
  142. switch err {
  143. case errNotWhitelisted, errSelf:
  144. glog.V(logger.Debug).Infof("removing static dial candidate %x@%v:%d: %v", t.dest.ID[:8], t.dest.IP, t.dest.TCP, err)
  145. delete(s.static, t.dest.ID)
  146. case nil:
  147. s.dialing[id] = t.flags
  148. newtasks = append(newtasks, t)
  149. }
  150. }
  151. // Use random nodes from the table for half of the necessary
  152. // dynamic dials.
  153. randomCandidates := needDynDials / 2
  154. if randomCandidates > 0 {
  155. n := s.ntab.ReadRandomNodes(s.randomNodes)
  156. for i := 0; i < randomCandidates && i < n; i++ {
  157. if addDial(dynDialedConn, s.randomNodes[i]) {
  158. needDynDials--
  159. }
  160. }
  161. }
  162. // Create dynamic dials from random lookup results, removing tried
  163. // items from the result buffer.
  164. i := 0
  165. for ; i < len(s.lookupBuf) && needDynDials > 0; i++ {
  166. if addDial(dynDialedConn, s.lookupBuf[i]) {
  167. needDynDials--
  168. }
  169. }
  170. s.lookupBuf = s.lookupBuf[:copy(s.lookupBuf, s.lookupBuf[i:])]
  171. // Launch a discovery lookup if more candidates are needed.
  172. if len(s.lookupBuf) < needDynDials && !s.lookupRunning {
  173. s.lookupRunning = true
  174. newtasks = append(newtasks, &discoverTask{})
  175. }
  176. // Launch a timer to wait for the next node to expire if all
  177. // candidates have been tried and no task is currently active.
  178. // This should prevent cases where the dialer logic is not ticked
  179. // because there are no pending events.
  180. if nRunning == 0 && len(newtasks) == 0 && s.hist.Len() > 0 {
  181. t := &waitExpireTask{s.hist.min().exp.Sub(now)}
  182. newtasks = append(newtasks, t)
  183. }
  184. return newtasks
  185. }
  186. var (
  187. errSelf = errors.New("is self")
  188. errAlreadyDialing = errors.New("already dialing")
  189. errAlreadyConnected = errors.New("already connected")
  190. errRecentlyDialed = errors.New("recently dialed")
  191. errNotWhitelisted = errors.New("not contained in netrestrict whitelist")
  192. )
  193. func (s *dialstate) checkDial(n *discover.Node, peers map[discover.NodeID]*Peer) error {
  194. _, dialing := s.dialing[n.ID]
  195. switch {
  196. case dialing:
  197. return errAlreadyDialing
  198. case peers[n.ID] != nil:
  199. return errAlreadyConnected
  200. case s.ntab != nil && n.ID == s.ntab.Self().ID:
  201. return errSelf
  202. case s.netrestrict != nil && !s.netrestrict.Contains(n.IP):
  203. return errNotWhitelisted
  204. case s.hist.contains(n.ID):
  205. return errRecentlyDialed
  206. }
  207. return nil
  208. }
  209. func (s *dialstate) taskDone(t task, now time.Time) {
  210. switch t := t.(type) {
  211. case *dialTask:
  212. s.hist.add(t.dest.ID, now.Add(dialHistoryExpiration))
  213. delete(s.dialing, t.dest.ID)
  214. case *discoverTask:
  215. s.lookupRunning = false
  216. s.lookupBuf = append(s.lookupBuf, t.results...)
  217. }
  218. }
  219. func (t *dialTask) Do(srv *Server) {
  220. if t.dest.Incomplete() {
  221. if !t.resolve(srv) {
  222. return
  223. }
  224. }
  225. success := t.dial(srv, t.dest)
  226. // Try resolving the ID of static nodes if dialing failed.
  227. if !success && t.flags&staticDialedConn != 0 {
  228. if t.resolve(srv) {
  229. t.dial(srv, t.dest)
  230. }
  231. }
  232. }
  233. // resolve attempts to find the current endpoint for the destination
  234. // using discovery.
  235. //
  236. // Resolve operations are throttled with backoff to avoid flooding the
  237. // discovery network with useless queries for nodes that don't exist.
  238. // The backoff delay resets when the node is found.
  239. func (t *dialTask) resolve(srv *Server) bool {
  240. if srv.ntab == nil {
  241. glog.V(logger.Debug).Infof("can't resolve node %x: discovery is disabled", t.dest.ID[:6])
  242. return false
  243. }
  244. if t.resolveDelay == 0 {
  245. t.resolveDelay = initialResolveDelay
  246. }
  247. if time.Since(t.lastResolved) < t.resolveDelay {
  248. return false
  249. }
  250. resolved := srv.ntab.Resolve(t.dest.ID)
  251. t.lastResolved = time.Now()
  252. if resolved == nil {
  253. t.resolveDelay *= 2
  254. if t.resolveDelay > maxResolveDelay {
  255. t.resolveDelay = maxResolveDelay
  256. }
  257. glog.V(logger.Debug).Infof("resolving node %x failed (new delay: %v)", t.dest.ID[:6], t.resolveDelay)
  258. return false
  259. }
  260. // The node was found.
  261. t.resolveDelay = initialResolveDelay
  262. t.dest = resolved
  263. glog.V(logger.Debug).Infof("resolved node %x: %v:%d", t.dest.ID[:6], t.dest.IP, t.dest.TCP)
  264. return true
  265. }
  266. // dial performs the actual connection attempt.
  267. func (t *dialTask) dial(srv *Server, dest *discover.Node) bool {
  268. addr := &net.TCPAddr{IP: dest.IP, Port: int(dest.TCP)}
  269. glog.V(logger.Debug).Infof("dial tcp %v (%x)\n", addr, dest.ID[:6])
  270. fd, err := srv.Dialer.Dial("tcp", addr.String())
  271. if err != nil {
  272. glog.V(logger.Detail).Infof("%v", err)
  273. return false
  274. }
  275. mfd := newMeteredConn(fd, false)
  276. srv.setupConn(mfd, t.flags, dest)
  277. return true
  278. }
  279. func (t *dialTask) String() string {
  280. return fmt.Sprintf("%v %x %v:%d", t.flags, t.dest.ID[:8], t.dest.IP, t.dest.TCP)
  281. }
  282. func (t *discoverTask) Do(srv *Server) {
  283. // newTasks generates a lookup task whenever dynamic dials are
  284. // necessary. Lookups need to take some time, otherwise the
  285. // event loop spins too fast.
  286. next := srv.lastLookup.Add(lookupInterval)
  287. if now := time.Now(); now.Before(next) {
  288. time.Sleep(next.Sub(now))
  289. }
  290. srv.lastLookup = time.Now()
  291. var target discover.NodeID
  292. rand.Read(target[:])
  293. t.results = srv.ntab.Lookup(target)
  294. }
  295. func (t *discoverTask) String() string {
  296. s := "discovery lookup"
  297. if len(t.results) > 0 {
  298. s += fmt.Sprintf(" (%d results)", len(t.results))
  299. }
  300. return s
  301. }
  302. func (t waitExpireTask) Do(*Server) {
  303. time.Sleep(t.Duration)
  304. }
  305. func (t waitExpireTask) String() string {
  306. return fmt.Sprintf("wait for dial hist expire (%v)", t.Duration)
  307. }
  308. // Use only these methods to access or modify dialHistory.
  309. func (h dialHistory) min() pastDial {
  310. return h[0]
  311. }
  312. func (h *dialHistory) add(id discover.NodeID, exp time.Time) {
  313. heap.Push(h, pastDial{id, exp})
  314. }
  315. func (h dialHistory) contains(id discover.NodeID) bool {
  316. for _, v := range h {
  317. if v.id == id {
  318. return true
  319. }
  320. }
  321. return false
  322. }
  323. func (h *dialHistory) expire(now time.Time) {
  324. for h.Len() > 0 && h.min().exp.Before(now) {
  325. heap.Pop(h)
  326. }
  327. }
  328. // heap.Interface boilerplate
  329. func (h dialHistory) Len() int { return len(h) }
  330. func (h dialHistory) Less(i, j int) bool { return h[i].exp.Before(h[j].exp) }
  331. func (h dialHistory) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
  332. func (h *dialHistory) Push(x interface{}) {
  333. *h = append(*h, x.(pastDial))
  334. }
  335. func (h *dialHistory) Pop() interface{} {
  336. old := *h
  337. n := len(old)
  338. x := old[n-1]
  339. *h = old[0 : n-1]
  340. return x
  341. }