dial.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. // Copyright 2015 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. package p2p
  17. import (
  18. "errors"
  19. "fmt"
  20. "net"
  21. "time"
  22. "github.com/ethereum/go-ethereum/log"
  23. "github.com/ethereum/go-ethereum/p2p/enode"
  24. "github.com/ethereum/go-ethereum/p2p/netutil"
  25. )
  26. const (
  27. // This is the amount of time spent waiting in between redialing a certain node. The
  28. // limit is a bit higher than inboundThrottleTime to prevent failing dials in small
  29. // private networks.
  30. dialHistoryExpiration = inboundThrottleTime + 5*time.Second
  31. // If no peers are found for this amount of time, the initial bootnodes are dialed.
  32. fallbackInterval = 20 * time.Second
  33. // Endpoint resolution is throttled with bounded backoff.
  34. initialResolveDelay = 60 * time.Second
  35. maxResolveDelay = time.Hour
  36. )
  37. // NodeDialer is used to connect to nodes in the network, typically by using
  38. // an underlying net.Dialer but also using net.Pipe in tests
  39. type NodeDialer interface {
  40. Dial(*enode.Node) (net.Conn, error)
  41. }
  42. type nodeResolver interface {
  43. Resolve(*enode.Node) *enode.Node
  44. }
  45. // TCPDialer implements the NodeDialer interface by using a net.Dialer to
  46. // create TCP connections to nodes in the network
  47. type TCPDialer struct {
  48. *net.Dialer
  49. }
  50. // Dial creates a TCP connection to the node
  51. func (t TCPDialer) Dial(dest *enode.Node) (net.Conn, error) {
  52. addr := &net.TCPAddr{IP: dest.IP(), Port: dest.TCP()}
  53. return t.Dialer.Dial("tcp", addr.String())
  54. }
  55. // dialstate schedules dials and discovery lookups.
  56. // It gets a chance to compute new tasks on every iteration
  57. // of the main loop in Server.run.
  58. type dialstate struct {
  59. maxDynDials int
  60. netrestrict *netutil.Netlist
  61. self enode.ID
  62. bootnodes []*enode.Node // default dials when there are no peers
  63. log log.Logger
  64. start time.Time // time when the dialer was first used
  65. lookupRunning bool
  66. dialing map[enode.ID]connFlag
  67. lookupBuf []*enode.Node // current discovery lookup results
  68. static map[enode.ID]*dialTask
  69. hist expHeap
  70. }
  71. type task interface {
  72. Do(*Server)
  73. }
  74. func newDialState(self enode.ID, maxdyn int, cfg *Config) *dialstate {
  75. s := &dialstate{
  76. maxDynDials: maxdyn,
  77. self: self,
  78. netrestrict: cfg.NetRestrict,
  79. log: cfg.Logger,
  80. static: make(map[enode.ID]*dialTask),
  81. dialing: make(map[enode.ID]connFlag),
  82. bootnodes: make([]*enode.Node, len(cfg.BootstrapNodes)),
  83. }
  84. copy(s.bootnodes, cfg.BootstrapNodes)
  85. if s.log == nil {
  86. s.log = log.Root()
  87. }
  88. for _, n := range cfg.StaticNodes {
  89. s.addStatic(n)
  90. }
  91. return s
  92. }
  93. func (s *dialstate) addStatic(n *enode.Node) {
  94. // This overwrites the task instead of updating an existing
  95. // entry, giving users the opportunity to force a resolve operation.
  96. s.static[n.ID()] = &dialTask{flags: staticDialedConn, dest: n}
  97. }
  98. func (s *dialstate) removeStatic(n *enode.Node) {
  99. // This removes a task so future attempts to connect will not be made.
  100. delete(s.static, n.ID())
  101. }
  102. func (s *dialstate) newTasks(nRunning int, peers map[enode.ID]*Peer, now time.Time) []task {
  103. var newtasks []task
  104. addDial := func(flag connFlag, n *enode.Node) bool {
  105. if err := s.checkDial(n, peers); err != nil {
  106. s.log.Trace("Skipping dial candidate", "id", n.ID(), "addr", &net.TCPAddr{IP: n.IP(), Port: n.TCP()}, "err", err)
  107. return false
  108. }
  109. s.dialing[n.ID()] = flag
  110. newtasks = append(newtasks, &dialTask{flags: flag, dest: n})
  111. return true
  112. }
  113. if s.start.IsZero() {
  114. s.start = now
  115. }
  116. s.hist.expire(now)
  117. // Create dials for static nodes if they are not connected.
  118. for id, t := range s.static {
  119. err := s.checkDial(t.dest, peers)
  120. switch err {
  121. case errNotWhitelisted, errSelf:
  122. s.log.Warn("Removing static dial candidate", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()}, "err", err)
  123. delete(s.static, t.dest.ID())
  124. case nil:
  125. s.dialing[id] = t.flags
  126. newtasks = append(newtasks, t)
  127. }
  128. }
  129. // Compute number of dynamic dials needed.
  130. needDynDials := s.maxDynDials
  131. for _, p := range peers {
  132. if p.rw.is(dynDialedConn) {
  133. needDynDials--
  134. }
  135. }
  136. for _, flag := range s.dialing {
  137. if flag&dynDialedConn != 0 {
  138. needDynDials--
  139. }
  140. }
  141. // If we don't have any peers whatsoever, try to dial a random bootnode. This
  142. // scenario is useful for the testnet (and private networks) where the discovery
  143. // table might be full of mostly bad peers, making it hard to find good ones.
  144. if len(peers) == 0 && len(s.bootnodes) > 0 && needDynDials > 0 && now.Sub(s.start) > fallbackInterval {
  145. bootnode := s.bootnodes[0]
  146. s.bootnodes = append(s.bootnodes[:0], s.bootnodes[1:]...)
  147. s.bootnodes = append(s.bootnodes, bootnode)
  148. if addDial(dynDialedConn, bootnode) {
  149. needDynDials--
  150. }
  151. }
  152. // Create dynamic dials from discovery results.
  153. i := 0
  154. for ; i < len(s.lookupBuf) && needDynDials > 0; i++ {
  155. if addDial(dynDialedConn, s.lookupBuf[i]) {
  156. needDynDials--
  157. }
  158. }
  159. s.lookupBuf = s.lookupBuf[:copy(s.lookupBuf, s.lookupBuf[i:])]
  160. // Launch a discovery lookup if more candidates are needed.
  161. if len(s.lookupBuf) < needDynDials && !s.lookupRunning {
  162. s.lookupRunning = true
  163. newtasks = append(newtasks, &discoverTask{want: needDynDials - len(s.lookupBuf)})
  164. }
  165. // Launch a timer to wait for the next node to expire if all
  166. // candidates have been tried and no task is currently active.
  167. // This should prevent cases where the dialer logic is not ticked
  168. // because there are no pending events.
  169. if nRunning == 0 && len(newtasks) == 0 && s.hist.Len() > 0 {
  170. t := &waitExpireTask{s.hist.nextExpiry().Sub(now)}
  171. newtasks = append(newtasks, t)
  172. }
  173. return newtasks
  174. }
  175. var (
  176. errSelf = errors.New("is self")
  177. errAlreadyDialing = errors.New("already dialing")
  178. errAlreadyConnected = errors.New("already connected")
  179. errRecentlyDialed = errors.New("recently dialed")
  180. errNotWhitelisted = errors.New("not contained in netrestrict whitelist")
  181. )
  182. func (s *dialstate) checkDial(n *enode.Node, peers map[enode.ID]*Peer) error {
  183. _, dialing := s.dialing[n.ID()]
  184. switch {
  185. case dialing:
  186. return errAlreadyDialing
  187. case peers[n.ID()] != nil:
  188. return errAlreadyConnected
  189. case n.ID() == s.self:
  190. return errSelf
  191. case s.netrestrict != nil && !s.netrestrict.Contains(n.IP()):
  192. return errNotWhitelisted
  193. case s.hist.contains(string(n.ID().Bytes())):
  194. return errRecentlyDialed
  195. }
  196. return nil
  197. }
  198. func (s *dialstate) taskDone(t task, now time.Time) {
  199. switch t := t.(type) {
  200. case *dialTask:
  201. s.hist.add(string(t.dest.ID().Bytes()), now.Add(dialHistoryExpiration))
  202. delete(s.dialing, t.dest.ID())
  203. case *discoverTask:
  204. s.lookupRunning = false
  205. s.lookupBuf = append(s.lookupBuf, t.results...)
  206. }
  207. }
  208. // A dialTask is generated for each node that is dialed. Its
  209. // fields cannot be accessed while the task is running.
  210. type dialTask struct {
  211. flags connFlag
  212. dest *enode.Node
  213. lastResolved time.Time
  214. resolveDelay time.Duration
  215. }
  216. func (t *dialTask) Do(srv *Server) {
  217. if t.dest.Incomplete() {
  218. if !t.resolve(srv) {
  219. return
  220. }
  221. }
  222. err := t.dial(srv, t.dest)
  223. if err != nil {
  224. srv.log.Trace("Dial error", "task", t, "err", err)
  225. // Try resolving the ID of static nodes if dialing failed.
  226. if _, ok := err.(*dialError); ok && t.flags&staticDialedConn != 0 {
  227. if t.resolve(srv) {
  228. t.dial(srv, t.dest)
  229. }
  230. }
  231. }
  232. }
  233. // resolve attempts to find the current endpoint for the destination
  234. // using discovery.
  235. //
  236. // Resolve operations are throttled with backoff to avoid flooding the
  237. // discovery network with useless queries for nodes that don't exist.
  238. // The backoff delay resets when the node is found.
  239. func (t *dialTask) resolve(srv *Server) bool {
  240. if srv.staticNodeResolver == nil {
  241. srv.log.Debug("Can't resolve node", "id", t.dest.ID(), "err", "discovery is disabled")
  242. return false
  243. }
  244. if t.resolveDelay == 0 {
  245. t.resolveDelay = initialResolveDelay
  246. }
  247. if time.Since(t.lastResolved) < t.resolveDelay {
  248. return false
  249. }
  250. resolved := srv.staticNodeResolver.Resolve(t.dest)
  251. t.lastResolved = time.Now()
  252. if resolved == nil {
  253. t.resolveDelay *= 2
  254. if t.resolveDelay > maxResolveDelay {
  255. t.resolveDelay = maxResolveDelay
  256. }
  257. srv.log.Debug("Resolving node failed", "id", t.dest.ID(), "newdelay", t.resolveDelay)
  258. return false
  259. }
  260. // The node was found.
  261. t.resolveDelay = initialResolveDelay
  262. t.dest = resolved
  263. srv.log.Debug("Resolved node", "id", t.dest.ID(), "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()})
  264. return true
  265. }
  266. type dialError struct {
  267. error
  268. }
  269. // dial performs the actual connection attempt.
  270. func (t *dialTask) dial(srv *Server, dest *enode.Node) error {
  271. fd, err := srv.Dialer.Dial(dest)
  272. if err != nil {
  273. return &dialError{err}
  274. }
  275. mfd := newMeteredConn(fd, false, dest.IP())
  276. return srv.SetupConn(mfd, t.flags, dest)
  277. }
  278. func (t *dialTask) String() string {
  279. id := t.dest.ID()
  280. return fmt.Sprintf("%v %x %v:%d", t.flags, id[:8], t.dest.IP(), t.dest.TCP())
  281. }
  282. // discoverTask runs discovery table operations.
  283. // Only one discoverTask is active at any time.
  284. // discoverTask.Do performs a random lookup.
  285. type discoverTask struct {
  286. want int
  287. results []*enode.Node
  288. }
  289. func (t *discoverTask) Do(srv *Server) {
  290. t.results = enode.ReadNodes(srv.discmix, t.want)
  291. }
  292. func (t *discoverTask) String() string {
  293. s := "discovery query"
  294. if len(t.results) > 0 {
  295. s += fmt.Sprintf(" (%d results)", len(t.results))
  296. } else {
  297. s += fmt.Sprintf(" (want %d)", t.want)
  298. }
  299. return s
  300. }
  301. // A waitExpireTask is generated if there are no other tasks
  302. // to keep the loop in Server.run ticking.
  303. type waitExpireTask struct {
  304. time.Duration
  305. }
  306. func (t waitExpireTask) Do(*Server) {
  307. time.Sleep(t.Duration)
  308. }
  309. func (t waitExpireTask) String() string {
  310. return fmt.Sprintf("wait for dial hist expire (%v)", t.Duration)
  311. }