| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678 |
- // Copyright 2015 The go-ethereum Authors
- // This file is part of the go-ethereum library.
- //
- // The go-ethereum library is free software: you can redistribute it and/or modify
- // it under the terms of the GNU Lesser General Public License as published by
- // the Free Software Foundation, either version 3 of the License, or
- // (at your option) any later version.
- //
- // The go-ethereum library is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU Lesser General Public License for more details.
- //
- // You should have received a copy of the GNU Lesser General Public License
- // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
- // Package discover implements the Node Discovery Protocol.
- //
- // The Node Discovery protocol provides a way to find RLPx nodes that
- // can be connected to. It uses a Kademlia-like protocol to maintain a
- // distributed database of the IDs and endpoints of all listening
- // nodes.
- package discover
- import (
- "crypto/rand"
- "encoding/binary"
- "fmt"
- "net"
- "sort"
- "sync"
- "time"
- "github.com/ethereum/go-ethereum/common"
- "github.com/ethereum/go-ethereum/crypto"
- "github.com/ethereum/go-ethereum/logger"
- "github.com/ethereum/go-ethereum/logger/glog"
- )
- const (
- alpha = 3 // Kademlia concurrency factor
- bucketSize = 16 // Kademlia bucket size
- hashBits = len(common.Hash{}) * 8
- nBuckets = hashBits + 1 // Number of buckets
- maxBondingPingPongs = 16
- maxFindnodeFailures = 5
- autoRefreshInterval = 1 * time.Hour
- seedCount = 30
- seedMaxAge = 5 * 24 * time.Hour
- )
- type Table struct {
- mutex sync.Mutex // protects buckets, their content, and nursery
- buckets [nBuckets]*bucket // index of known nodes by distance
- nursery []*Node // bootstrap nodes
- db *nodeDB // database of known nodes
- refreshReq chan chan struct{}
- closeReq chan struct{}
- closed chan struct{}
- bondmu sync.Mutex
- bonding map[NodeID]*bondproc
- bondslots chan struct{} // limits total number of active bonding processes
- nodeAddedHook func(*Node) // for testing
- net transport
- self *Node // metadata of the local node
- }
- type bondproc struct {
- err error
- n *Node
- done chan struct{}
- }
- // transport is implemented by the UDP transport.
- // it is an interface so we can test without opening lots of UDP
- // sockets and without generating a private key.
- type transport interface {
- ping(NodeID, *net.UDPAddr) error
- waitping(NodeID) error
- findnode(toid NodeID, addr *net.UDPAddr, target NodeID) ([]*Node, error)
- close()
- }
- // bucket contains nodes, ordered by their last activity. the entry
- // that was most recently active is the first element in entries.
- type bucket struct{ entries []*Node }
- func newTable(t transport, ourID NodeID, ourAddr *net.UDPAddr, nodeDBPath string) (*Table, error) {
- // If no node database was given, use an in-memory one
- db, err := newNodeDB(nodeDBPath, Version, ourID)
- if err != nil {
- return nil, err
- }
- tab := &Table{
- net: t,
- db: db,
- self: NewNode(ourID, ourAddr.IP, uint16(ourAddr.Port), uint16(ourAddr.Port)),
- bonding: make(map[NodeID]*bondproc),
- bondslots: make(chan struct{}, maxBondingPingPongs),
- refreshReq: make(chan chan struct{}),
- closeReq: make(chan struct{}),
- closed: make(chan struct{}),
- }
- for i := 0; i < cap(tab.bondslots); i++ {
- tab.bondslots <- struct{}{}
- }
- for i := range tab.buckets {
- tab.buckets[i] = new(bucket)
- }
- go tab.refreshLoop()
- return tab, nil
- }
- // Self returns the local node.
- // The returned node should not be modified by the caller.
- func (tab *Table) Self() *Node {
- return tab.self
- }
- // ReadRandomNodes fills the given slice with random nodes from the
- // table. It will not write the same node more than once. The nodes in
- // the slice are copies and can be modified by the caller.
- func (tab *Table) ReadRandomNodes(buf []*Node) (n int) {
- tab.mutex.Lock()
- defer tab.mutex.Unlock()
- // TODO: tree-based buckets would help here
- // Find all non-empty buckets and get a fresh slice of their entries.
- var buckets [][]*Node
- for _, b := range tab.buckets {
- if len(b.entries) > 0 {
- buckets = append(buckets, b.entries[:])
- }
- }
- if len(buckets) == 0 {
- return 0
- }
- // Shuffle the buckets.
- for i := uint32(len(buckets)) - 1; i > 0; i-- {
- j := randUint(i)
- buckets[i], buckets[j] = buckets[j], buckets[i]
- }
- // Move head of each bucket into buf, removing buckets that become empty.
- var i, j int
- for ; i < len(buf); i, j = i+1, (j+1)%len(buckets) {
- b := buckets[j]
- buf[i] = &(*b[0])
- buckets[j] = b[1:]
- if len(b) == 1 {
- buckets = append(buckets[:j], buckets[j+1:]...)
- }
- if len(buckets) == 0 {
- break
- }
- }
- return i + 1
- }
- func randUint(max uint32) uint32 {
- if max == 0 {
- return 0
- }
- var b [4]byte
- rand.Read(b[:])
- return binary.BigEndian.Uint32(b[:]) % max
- }
- // Close terminates the network listener and flushes the node database.
- func (tab *Table) Close() {
- select {
- case <-tab.closed:
- // already closed.
- case tab.closeReq <- struct{}{}:
- <-tab.closed // wait for refreshLoop to end.
- }
- }
- // SetFallbackNodes sets the initial points of contact. These nodes
- // are used to connect to the network if the table is empty and there
- // are no known nodes in the database.
- func (tab *Table) SetFallbackNodes(nodes []*Node) error {
- for _, n := range nodes {
- if err := n.validateComplete(); err != nil {
- return fmt.Errorf("bad bootstrap/fallback node %q (%v)", n, err)
- }
- }
- tab.mutex.Lock()
- tab.nursery = make([]*Node, 0, len(nodes))
- for _, n := range nodes {
- cpy := *n
- // Recompute cpy.sha because the node might not have been
- // created by NewNode or ParseNode.
- cpy.sha = crypto.Keccak256Hash(n.ID[:])
- tab.nursery = append(tab.nursery, &cpy)
- }
- tab.mutex.Unlock()
- tab.refresh()
- return nil
- }
- // Resolve searches for a specific node with the given ID.
- // It returns nil if the node could not be found.
- func (tab *Table) Resolve(targetID NodeID) *Node {
- // If the node is present in the local table, no
- // network interaction is required.
- hash := crypto.Keccak256Hash(targetID[:])
- tab.mutex.Lock()
- cl := tab.closest(hash, 1)
- tab.mutex.Unlock()
- if len(cl.entries) > 0 && cl.entries[0].ID == targetID {
- return cl.entries[0]
- }
- // Otherwise, do a network lookup.
- result := tab.Lookup(targetID)
- for _, n := range result {
- if n.ID == targetID {
- return n
- }
- }
- return nil
- }
- // Lookup performs a network search for nodes close
- // to the given target. It approaches the target by querying
- // nodes that are closer to it on each iteration.
- // The given target does not need to be an actual node
- // identifier.
- func (tab *Table) Lookup(targetID NodeID) []*Node {
- return tab.lookup(targetID, true)
- }
- func (tab *Table) lookup(targetID NodeID, refreshIfEmpty bool) []*Node {
- var (
- target = crypto.Keccak256Hash(targetID[:])
- asked = make(map[NodeID]bool)
- seen = make(map[NodeID]bool)
- reply = make(chan []*Node, alpha)
- pendingQueries = 0
- result *nodesByDistance
- )
- // don't query further if we hit ourself.
- // unlikely to happen often in practice.
- asked[tab.self.ID] = true
- for {
- tab.mutex.Lock()
- // generate initial result set
- result = tab.closest(target, bucketSize)
- tab.mutex.Unlock()
- if len(result.entries) > 0 || !refreshIfEmpty {
- break
- }
- // The result set is empty, all nodes were dropped, refresh.
- // We actually wait for the refresh to complete here. The very
- // first query will hit this case and run the bootstrapping
- // logic.
- <-tab.refresh()
- refreshIfEmpty = false
- }
- for {
- // ask the alpha closest nodes that we haven't asked yet
- for i := 0; i < len(result.entries) && pendingQueries < alpha; i++ {
- n := result.entries[i]
- if !asked[n.ID] {
- asked[n.ID] = true
- pendingQueries++
- go func() {
- // Find potential neighbors to bond with
- r, err := tab.net.findnode(n.ID, n.addr(), targetID)
- if err != nil {
- // Bump the failure counter to detect and evacuate non-bonded entries
- fails := tab.db.findFails(n.ID) + 1
- tab.db.updateFindFails(n.ID, fails)
- glog.V(logger.Detail).Infof("Bumping failures for %x: %d", n.ID[:8], fails)
- if fails >= maxFindnodeFailures {
- glog.V(logger.Detail).Infof("Evacuating node %x: %d findnode failures", n.ID[:8], fails)
- tab.delete(n)
- }
- }
- reply <- tab.bondall(r)
- }()
- }
- }
- if pendingQueries == 0 {
- // we have asked all closest nodes, stop the search
- break
- }
- // wait for the next reply
- for _, n := range <-reply {
- if n != nil && !seen[n.ID] {
- seen[n.ID] = true
- result.push(n, bucketSize)
- }
- }
- pendingQueries--
- }
- return result.entries
- }
- func (tab *Table) refresh() <-chan struct{} {
- done := make(chan struct{})
- select {
- case tab.refreshReq <- done:
- case <-tab.closed:
- close(done)
- }
- return done
- }
- // refreshLoop schedules doRefresh runs and coordinates shutdown.
- func (tab *Table) refreshLoop() {
- var (
- timer = time.NewTicker(autoRefreshInterval)
- waiting []chan struct{} // accumulates waiting callers while doRefresh runs
- done chan struct{} // where doRefresh reports completion
- )
- loop:
- for {
- select {
- case <-timer.C:
- if done == nil {
- done = make(chan struct{})
- go tab.doRefresh(done)
- }
- case req := <-tab.refreshReq:
- waiting = append(waiting, req)
- if done == nil {
- done = make(chan struct{})
- go tab.doRefresh(done)
- }
- case <-done:
- for _, ch := range waiting {
- close(ch)
- }
- waiting = nil
- done = nil
- case <-tab.closeReq:
- break loop
- }
- }
- if tab.net != nil {
- tab.net.close()
- }
- if done != nil {
- <-done
- }
- for _, ch := range waiting {
- close(ch)
- }
- tab.db.close()
- close(tab.closed)
- }
- // doRefresh performs a lookup for a random target to keep buckets
- // full. seed nodes are inserted if the table is empty (initial
- // bootstrap or discarded faulty peers).
- func (tab *Table) doRefresh(done chan struct{}) {
- defer close(done)
- // The Kademlia paper specifies that the bucket refresh should
- // perform a lookup in the least recently used bucket. We cannot
- // adhere to this because the findnode target is a 512bit value
- // (not hash-sized) and it is not easily possible to generate a
- // sha3 preimage that falls into a chosen bucket.
- // We perform a lookup with a random target instead.
- var target NodeID
- rand.Read(target[:])
- result := tab.lookup(target, false)
- if len(result) > 0 {
- return
- }
- // The table is empty. Load nodes from the database and insert
- // them. This should yield a few previously seen nodes that are
- // (hopefully) still alive.
- seeds := tab.db.querySeeds(seedCount, seedMaxAge)
- seeds = tab.bondall(append(seeds, tab.nursery...))
- if glog.V(logger.Debug) {
- if len(seeds) == 0 {
- glog.Infof("no seed nodes found")
- }
- for _, n := range seeds {
- age := time.Since(tab.db.lastPong(n.ID))
- glog.Infof("seed node (age %v): %v", age, n)
- }
- }
- tab.mutex.Lock()
- tab.stuff(seeds)
- tab.mutex.Unlock()
- // Finally, do a self lookup to fill up the buckets.
- tab.lookup(tab.self.ID, false)
- }
- // closest returns the n nodes in the table that are closest to the
- // given id. The caller must hold tab.mutex.
- func (tab *Table) closest(target common.Hash, nresults int) *nodesByDistance {
- // This is a very wasteful way to find the closest nodes but
- // obviously correct. I believe that tree-based buckets would make
- // this easier to implement efficiently.
- close := &nodesByDistance{target: target}
- for _, b := range tab.buckets {
- for _, n := range b.entries {
- close.push(n, nresults)
- }
- }
- return close
- }
- func (tab *Table) len() (n int) {
- for _, b := range tab.buckets {
- n += len(b.entries)
- }
- return n
- }
- // bondall bonds with all given nodes concurrently and returns
- // those nodes for which bonding has probably succeeded.
- func (tab *Table) bondall(nodes []*Node) (result []*Node) {
- rc := make(chan *Node, len(nodes))
- for i := range nodes {
- go func(n *Node) {
- nn, _ := tab.bond(false, n.ID, n.addr(), uint16(n.TCP))
- rc <- nn
- }(nodes[i])
- }
- for _ = range nodes {
- if n := <-rc; n != nil {
- result = append(result, n)
- }
- }
- return result
- }
- // bond ensures the local node has a bond with the given remote node.
- // It also attempts to insert the node into the table if bonding succeeds.
- // The caller must not hold tab.mutex.
- //
- // A bond is must be established before sending findnode requests.
- // Both sides must have completed a ping/pong exchange for a bond to
- // exist. The total number of active bonding processes is limited in
- // order to restrain network use.
- //
- // bond is meant to operate idempotently in that bonding with a remote
- // node which still remembers a previously established bond will work.
- // The remote node will simply not send a ping back, causing waitping
- // to time out.
- //
- // If pinged is true, the remote node has just pinged us and one half
- // of the process can be skipped.
- func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) (*Node, error) {
- // Retrieve a previously known node and any recent findnode failures
- node, fails := tab.db.node(id), 0
- if node != nil {
- fails = tab.db.findFails(id)
- }
- // If the node is unknown (non-bonded) or failed (remotely unknown), bond from scratch
- var result error
- age := time.Since(tab.db.lastPong(id))
- if node == nil || fails > 0 || age > nodeDBNodeExpiration {
- glog.V(logger.Detail).Infof("Bonding %x: known=%t, fails=%d age=%v", id[:8], node != nil, fails, age)
- tab.bondmu.Lock()
- w := tab.bonding[id]
- if w != nil {
- // Wait for an existing bonding process to complete.
- tab.bondmu.Unlock()
- <-w.done
- } else {
- // Register a new bonding process.
- w = &bondproc{done: make(chan struct{})}
- tab.bonding[id] = w
- tab.bondmu.Unlock()
- // Do the ping/pong. The result goes into w.
- tab.pingpong(w, pinged, id, addr, tcpPort)
- // Unregister the process after it's done.
- tab.bondmu.Lock()
- delete(tab.bonding, id)
- tab.bondmu.Unlock()
- }
- // Retrieve the bonding results
- result = w.err
- if result == nil {
- node = w.n
- }
- }
- if node != nil {
- // Add the node to the table even if the bonding ping/pong
- // fails. It will be relaced quickly if it continues to be
- // unresponsive.
- tab.add(node)
- tab.db.updateFindFails(id, 0)
- }
- return node, result
- }
- func (tab *Table) pingpong(w *bondproc, pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) {
- // Request a bonding slot to limit network usage
- <-tab.bondslots
- defer func() { tab.bondslots <- struct{}{} }()
- // Ping the remote side and wait for a pong.
- if w.err = tab.ping(id, addr); w.err != nil {
- close(w.done)
- return
- }
- if !pinged {
- // Give the remote node a chance to ping us before we start
- // sending findnode requests. If they still remember us,
- // waitping will simply time out.
- tab.net.waitping(id)
- }
- // Bonding succeeded, update the node database.
- w.n = NewNode(id, addr.IP, uint16(addr.Port), tcpPort)
- tab.db.updateNode(w.n)
- close(w.done)
- }
- // ping a remote endpoint and wait for a reply, also updating the node
- // database accordingly.
- func (tab *Table) ping(id NodeID, addr *net.UDPAddr) error {
- tab.db.updateLastPing(id, time.Now())
- if err := tab.net.ping(id, addr); err != nil {
- return err
- }
- tab.db.updateLastPong(id, time.Now())
- // Start the background expiration goroutine after the first
- // successful communication. Subsequent calls have no effect if it
- // is already running. We do this here instead of somewhere else
- // so that the search for seed nodes also considers older nodes
- // that would otherwise be removed by the expiration.
- tab.db.ensureExpirer()
- return nil
- }
- // add attempts to add the given node its corresponding bucket. If the
- // bucket has space available, adding the node succeeds immediately.
- // Otherwise, the node is added if the least recently active node in
- // the bucket does not respond to a ping packet.
- //
- // The caller must not hold tab.mutex.
- func (tab *Table) add(new *Node) {
- b := tab.buckets[logdist(tab.self.sha, new.sha)]
- tab.mutex.Lock()
- defer tab.mutex.Unlock()
- if b.bump(new) {
- return
- }
- var oldest *Node
- if len(b.entries) == bucketSize {
- oldest = b.entries[bucketSize-1]
- if oldest.contested {
- // The node is already being replaced, don't attempt
- // to replace it.
- return
- }
- oldest.contested = true
- // Let go of the mutex so other goroutines can access
- // the table while we ping the least recently active node.
- tab.mutex.Unlock()
- err := tab.ping(oldest.ID, oldest.addr())
- tab.mutex.Lock()
- oldest.contested = false
- if err == nil {
- // The node responded, don't replace it.
- return
- }
- }
- added := b.replace(new, oldest)
- if added && tab.nodeAddedHook != nil {
- tab.nodeAddedHook(new)
- }
- }
- // stuff adds nodes the table to the end of their corresponding bucket
- // if the bucket is not full. The caller must hold tab.mutex.
- func (tab *Table) stuff(nodes []*Node) {
- outer:
- for _, n := range nodes {
- if n.ID == tab.self.ID {
- continue // don't add self
- }
- bucket := tab.buckets[logdist(tab.self.sha, n.sha)]
- for i := range bucket.entries {
- if bucket.entries[i].ID == n.ID {
- continue outer // already in bucket
- }
- }
- if len(bucket.entries) < bucketSize {
- bucket.entries = append(bucket.entries, n)
- if tab.nodeAddedHook != nil {
- tab.nodeAddedHook(n)
- }
- }
- }
- }
- // delete removes an entry from the node table (used to evacuate
- // failed/non-bonded discovery peers).
- func (tab *Table) delete(node *Node) {
- tab.mutex.Lock()
- defer tab.mutex.Unlock()
- bucket := tab.buckets[logdist(tab.self.sha, node.sha)]
- for i := range bucket.entries {
- if bucket.entries[i].ID == node.ID {
- bucket.entries = append(bucket.entries[:i], bucket.entries[i+1:]...)
- return
- }
- }
- }
- func (b *bucket) replace(n *Node, last *Node) bool {
- // Don't add if b already contains n.
- for i := range b.entries {
- if b.entries[i].ID == n.ID {
- return false
- }
- }
- // Replace last if it is still the last entry or just add n if b
- // isn't full. If is no longer the last entry, it has either been
- // replaced with someone else or became active.
- if len(b.entries) == bucketSize && (last == nil || b.entries[bucketSize-1].ID != last.ID) {
- return false
- }
- if len(b.entries) < bucketSize {
- b.entries = append(b.entries, nil)
- }
- copy(b.entries[1:], b.entries)
- b.entries[0] = n
- return true
- }
- func (b *bucket) bump(n *Node) bool {
- for i := range b.entries {
- if b.entries[i].ID == n.ID {
- // move it to the front
- copy(b.entries[1:], b.entries[:i])
- b.entries[0] = n
- return true
- }
- }
- return false
- }
- // nodesByDistance is a list of nodes, ordered by
- // distance to target.
- type nodesByDistance struct {
- entries []*Node
- target common.Hash
- }
- // push adds the given node to the list, keeping the total size below maxElems.
- func (h *nodesByDistance) push(n *Node, maxElems int) {
- ix := sort.Search(len(h.entries), func(i int) bool {
- return distcmp(h.target, h.entries[i].sha, n.sha) > 0
- })
- if len(h.entries) < maxElems {
- h.entries = append(h.entries, n)
- }
- if ix == len(h.entries) {
- // farther away than all nodes we already have.
- // if there was room for it, the node is now the last element.
- } else {
- // slide existing entries down to make room
- // this will overwrite the entry we just appended.
- copy(h.entries[ix+1:], h.entries[ix:])
- h.entries[ix] = n
- }
- }
|