dial.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. // Copyright 2015 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. package p2p
  17. import (
  18. "errors"
  19. "fmt"
  20. "net"
  21. "time"
  22. "github.com/ethereum/go-ethereum/log"
  23. "github.com/ethereum/go-ethereum/p2p/enode"
  24. "github.com/ethereum/go-ethereum/p2p/netutil"
  25. )
  26. const (
  27. // This is the amount of time spent waiting in between redialing a certain node. The
  28. // limit is a bit higher than inboundThrottleTime to prevent failing dials in small
  29. // private networks.
  30. dialHistoryExpiration = inboundThrottleTime + 5*time.Second
  31. // Discovery lookups are throttled and can only run
  32. // once every few seconds.
  33. lookupInterval = 4 * time.Second
  34. // If no peers are found for this amount of time, the initial bootnodes are
  35. // attempted to be connected.
  36. fallbackInterval = 20 * time.Second
  37. // Endpoint resolution is throttled with bounded backoff.
  38. initialResolveDelay = 60 * time.Second
  39. maxResolveDelay = time.Hour
  40. )
  41. // NodeDialer is used to connect to nodes in the network, typically by using
  42. // an underlying net.Dialer but also using net.Pipe in tests
  43. type NodeDialer interface {
  44. Dial(*enode.Node) (net.Conn, error)
  45. }
  46. // TCPDialer implements the NodeDialer interface by using a net.Dialer to
  47. // create TCP connections to nodes in the network
  48. type TCPDialer struct {
  49. *net.Dialer
  50. }
  51. // Dial creates a TCP connection to the node
  52. func (t TCPDialer) Dial(dest *enode.Node) (net.Conn, error) {
  53. addr := &net.TCPAddr{IP: dest.IP(), Port: dest.TCP()}
  54. return t.Dialer.Dial("tcp", addr.String())
  55. }
  56. // dialstate schedules dials and discovery lookups.
  57. // It gets a chance to compute new tasks on every iteration
  58. // of the main loop in Server.run.
  59. type dialstate struct {
  60. maxDynDials int
  61. ntab discoverTable
  62. netrestrict *netutil.Netlist
  63. self enode.ID
  64. bootnodes []*enode.Node // default dials when there are no peers
  65. log log.Logger
  66. start time.Time // time when the dialer was first used
  67. lookupRunning bool
  68. dialing map[enode.ID]connFlag
  69. lookupBuf []*enode.Node // current discovery lookup results
  70. randomNodes []*enode.Node // filled from Table
  71. static map[enode.ID]*dialTask
  72. hist expHeap
  73. }
  74. type discoverTable interface {
  75. Close()
  76. Resolve(*enode.Node) *enode.Node
  77. LookupRandom() []*enode.Node
  78. ReadRandomNodes([]*enode.Node) int
  79. }
  80. type task interface {
  81. Do(*Server)
  82. }
  83. // A dialTask is generated for each node that is dialed. Its
  84. // fields cannot be accessed while the task is running.
  85. type dialTask struct {
  86. flags connFlag
  87. dest *enode.Node
  88. lastResolved time.Time
  89. resolveDelay time.Duration
  90. }
  91. // discoverTask runs discovery table operations.
  92. // Only one discoverTask is active at any time.
  93. // discoverTask.Do performs a random lookup.
  94. type discoverTask struct {
  95. results []*enode.Node
  96. }
  97. // A waitExpireTask is generated if there are no other tasks
  98. // to keep the loop in Server.run ticking.
  99. type waitExpireTask struct {
  100. time.Duration
  101. }
  102. func newDialState(self enode.ID, ntab discoverTable, maxdyn int, cfg *Config) *dialstate {
  103. s := &dialstate{
  104. maxDynDials: maxdyn,
  105. ntab: ntab,
  106. self: self,
  107. netrestrict: cfg.NetRestrict,
  108. log: cfg.Logger,
  109. static: make(map[enode.ID]*dialTask),
  110. dialing: make(map[enode.ID]connFlag),
  111. bootnodes: make([]*enode.Node, len(cfg.BootstrapNodes)),
  112. randomNodes: make([]*enode.Node, maxdyn/2),
  113. }
  114. copy(s.bootnodes, cfg.BootstrapNodes)
  115. if s.log == nil {
  116. s.log = log.Root()
  117. }
  118. for _, n := range cfg.StaticNodes {
  119. s.addStatic(n)
  120. }
  121. return s
  122. }
  123. func (s *dialstate) addStatic(n *enode.Node) {
  124. // This overwrites the task instead of updating an existing
  125. // entry, giving users the opportunity to force a resolve operation.
  126. s.static[n.ID()] = &dialTask{flags: staticDialedConn, dest: n}
  127. }
  128. func (s *dialstate) removeStatic(n *enode.Node) {
  129. // This removes a task so future attempts to connect will not be made.
  130. delete(s.static, n.ID())
  131. }
  132. func (s *dialstate) newTasks(nRunning int, peers map[enode.ID]*Peer, now time.Time) []task {
  133. if s.start.IsZero() {
  134. s.start = now
  135. }
  136. var newtasks []task
  137. addDial := func(flag connFlag, n *enode.Node) bool {
  138. if err := s.checkDial(n, peers); err != nil {
  139. s.log.Trace("Skipping dial candidate", "id", n.ID(), "addr", &net.TCPAddr{IP: n.IP(), Port: n.TCP()}, "err", err)
  140. return false
  141. }
  142. s.dialing[n.ID()] = flag
  143. newtasks = append(newtasks, &dialTask{flags: flag, dest: n})
  144. return true
  145. }
  146. // Compute number of dynamic dials necessary at this point.
  147. needDynDials := s.maxDynDials
  148. for _, p := range peers {
  149. if p.rw.is(dynDialedConn) {
  150. needDynDials--
  151. }
  152. }
  153. for _, flag := range s.dialing {
  154. if flag&dynDialedConn != 0 {
  155. needDynDials--
  156. }
  157. }
  158. // Expire the dial history on every invocation.
  159. s.hist.expire(now)
  160. // Create dials for static nodes if they are not connected.
  161. for id, t := range s.static {
  162. err := s.checkDial(t.dest, peers)
  163. switch err {
  164. case errNotWhitelisted, errSelf:
  165. s.log.Warn("Removing static dial candidate", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()}, "err", err)
  166. delete(s.static, t.dest.ID())
  167. case nil:
  168. s.dialing[id] = t.flags
  169. newtasks = append(newtasks, t)
  170. }
  171. }
  172. // If we don't have any peers whatsoever, try to dial a random bootnode. This
  173. // scenario is useful for the testnet (and private networks) where the discovery
  174. // table might be full of mostly bad peers, making it hard to find good ones.
  175. if len(peers) == 0 && len(s.bootnodes) > 0 && needDynDials > 0 && now.Sub(s.start) > fallbackInterval {
  176. bootnode := s.bootnodes[0]
  177. s.bootnodes = append(s.bootnodes[:0], s.bootnodes[1:]...)
  178. s.bootnodes = append(s.bootnodes, bootnode)
  179. if addDial(dynDialedConn, bootnode) {
  180. needDynDials--
  181. }
  182. }
  183. // Use random nodes from the table for half of the necessary
  184. // dynamic dials.
  185. randomCandidates := needDynDials / 2
  186. if randomCandidates > 0 {
  187. n := s.ntab.ReadRandomNodes(s.randomNodes)
  188. for i := 0; i < randomCandidates && i < n; i++ {
  189. if addDial(dynDialedConn, s.randomNodes[i]) {
  190. needDynDials--
  191. }
  192. }
  193. }
  194. // Create dynamic dials from random lookup results, removing tried
  195. // items from the result buffer.
  196. i := 0
  197. for ; i < len(s.lookupBuf) && needDynDials > 0; i++ {
  198. if addDial(dynDialedConn, s.lookupBuf[i]) {
  199. needDynDials--
  200. }
  201. }
  202. s.lookupBuf = s.lookupBuf[:copy(s.lookupBuf, s.lookupBuf[i:])]
  203. // Launch a discovery lookup if more candidates are needed.
  204. if len(s.lookupBuf) < needDynDials && !s.lookupRunning {
  205. s.lookupRunning = true
  206. newtasks = append(newtasks, &discoverTask{})
  207. }
  208. // Launch a timer to wait for the next node to expire if all
  209. // candidates have been tried and no task is currently active.
  210. // This should prevent cases where the dialer logic is not ticked
  211. // because there are no pending events.
  212. if nRunning == 0 && len(newtasks) == 0 && s.hist.Len() > 0 {
  213. t := &waitExpireTask{s.hist.nextExpiry().Sub(now)}
  214. newtasks = append(newtasks, t)
  215. }
  216. return newtasks
  217. }
  218. var (
  219. errSelf = errors.New("is self")
  220. errAlreadyDialing = errors.New("already dialing")
  221. errAlreadyConnected = errors.New("already connected")
  222. errRecentlyDialed = errors.New("recently dialed")
  223. errNotWhitelisted = errors.New("not contained in netrestrict whitelist")
  224. )
  225. func (s *dialstate) checkDial(n *enode.Node, peers map[enode.ID]*Peer) error {
  226. _, dialing := s.dialing[n.ID()]
  227. switch {
  228. case dialing:
  229. return errAlreadyDialing
  230. case peers[n.ID()] != nil:
  231. return errAlreadyConnected
  232. case n.ID() == s.self:
  233. return errSelf
  234. case s.netrestrict != nil && !s.netrestrict.Contains(n.IP()):
  235. return errNotWhitelisted
  236. case s.hist.contains(string(n.ID().Bytes())):
  237. return errRecentlyDialed
  238. }
  239. return nil
  240. }
  241. func (s *dialstate) taskDone(t task, now time.Time) {
  242. switch t := t.(type) {
  243. case *dialTask:
  244. s.hist.add(string(t.dest.ID().Bytes()), now.Add(dialHistoryExpiration))
  245. delete(s.dialing, t.dest.ID())
  246. case *discoverTask:
  247. s.lookupRunning = false
  248. s.lookupBuf = append(s.lookupBuf, t.results...)
  249. }
  250. }
  251. func (t *dialTask) Do(srv *Server) {
  252. if t.dest.Incomplete() {
  253. if !t.resolve(srv) {
  254. return
  255. }
  256. }
  257. err := t.dial(srv, t.dest)
  258. if err != nil {
  259. srv.log.Trace("Dial error", "task", t, "err", err)
  260. // Try resolving the ID of static nodes if dialing failed.
  261. if _, ok := err.(*dialError); ok && t.flags&staticDialedConn != 0 {
  262. if t.resolve(srv) {
  263. t.dial(srv, t.dest)
  264. }
  265. }
  266. }
  267. }
  268. // resolve attempts to find the current endpoint for the destination
  269. // using discovery.
  270. //
  271. // Resolve operations are throttled with backoff to avoid flooding the
  272. // discovery network with useless queries for nodes that don't exist.
  273. // The backoff delay resets when the node is found.
  274. func (t *dialTask) resolve(srv *Server) bool {
  275. if srv.ntab == nil {
  276. srv.log.Debug("Can't resolve node", "id", t.dest.ID, "err", "discovery is disabled")
  277. return false
  278. }
  279. if t.resolveDelay == 0 {
  280. t.resolveDelay = initialResolveDelay
  281. }
  282. if time.Since(t.lastResolved) < t.resolveDelay {
  283. return false
  284. }
  285. resolved := srv.ntab.Resolve(t.dest)
  286. t.lastResolved = time.Now()
  287. if resolved == nil {
  288. t.resolveDelay *= 2
  289. if t.resolveDelay > maxResolveDelay {
  290. t.resolveDelay = maxResolveDelay
  291. }
  292. srv.log.Debug("Resolving node failed", "id", t.dest.ID, "newdelay", t.resolveDelay)
  293. return false
  294. }
  295. // The node was found.
  296. t.resolveDelay = initialResolveDelay
  297. t.dest = resolved
  298. srv.log.Debug("Resolved node", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()})
  299. return true
  300. }
  301. type dialError struct {
  302. error
  303. }
  304. // dial performs the actual connection attempt.
  305. func (t *dialTask) dial(srv *Server, dest *enode.Node) error {
  306. fd, err := srv.Dialer.Dial(dest)
  307. if err != nil {
  308. return &dialError{err}
  309. }
  310. mfd := newMeteredConn(fd, false, dest.IP())
  311. return srv.SetupConn(mfd, t.flags, dest)
  312. }
  313. func (t *dialTask) String() string {
  314. id := t.dest.ID()
  315. return fmt.Sprintf("%v %x %v:%d", t.flags, id[:8], t.dest.IP(), t.dest.TCP())
  316. }
  317. func (t *discoverTask) Do(srv *Server) {
  318. // newTasks generates a lookup task whenever dynamic dials are
  319. // necessary. Lookups need to take some time, otherwise the
  320. // event loop spins too fast.
  321. next := srv.lastLookup.Add(lookupInterval)
  322. if now := time.Now(); now.Before(next) {
  323. time.Sleep(next.Sub(now))
  324. }
  325. srv.lastLookup = time.Now()
  326. t.results = srv.ntab.LookupRandom()
  327. }
  328. func (t *discoverTask) String() string {
  329. s := "discovery lookup"
  330. if len(t.results) > 0 {
  331. s += fmt.Sprintf(" (%d results)", len(t.results))
  332. }
  333. return s
  334. }
  335. func (t waitExpireTask) Do(*Server) {
  336. time.Sleep(t.Duration)
  337. }
  338. func (t waitExpireTask) String() string {
  339. return fmt.Sprintf("wait for dial hist expire (%v)", t.Duration)
  340. }