浏览代码

les: fix retriever logic (#16776)

This PR fixes a retriever logic bug. When a peer had a soft timeout
and then a response arrived, it always assumed it was the same peer
even though it could have been a later requested one that did not time
out at all yet. In this case the logic went to an illegal state and
deadlocked, causing a goroutine leak.

Fixes #16243 and replaces #16359.
Thanks to @riceke for finding the bug in the logic.
Felföldi Zsolt 7 年之前
父节点
当前提交
25982375a8
共有 1 个文件被更改,包括 17 次插入13 次删除
  1. 17 13
      les/retrieve.go

+ 17 - 13
les/retrieve.go

@@ -69,9 +69,9 @@ type sentReq struct {
 	lock   sync.RWMutex // protect access to sentTo map
 	sentTo map[distPeer]sentReqToPeer
 
-	reqQueued    bool // a request has been queued but not sent
-	reqSent      bool // a request has been sent but not timed out
-	reqSrtoCount int  // number of requests that reached soft (but not hard) timeout
+	lastReqQueued bool     // last request has been queued but not sent
+	lastReqSentTo distPeer // if not nil then last request has been sent to given peer but not timed out
+	reqSrtoCount  int      // number of requests that reached soft (but not hard) timeout
 }
 
 // sentReqToPeer notifies the request-from-peer goroutine (tryRequest) about a response
@@ -180,7 +180,7 @@ type reqStateFn func() reqStateFn
 // retrieveLoop is the retrieval state machine event loop
 func (r *sentReq) retrieveLoop() {
 	go r.tryRequest()
-	r.reqQueued = true
+	r.lastReqQueued = true
 	state := r.stateRequesting
 
 	for state != nil {
@@ -214,7 +214,7 @@ func (r *sentReq) stateRequesting() reqStateFn {
 		case rpSoftTimeout:
 			// last request timed out, try asking a new peer
 			go r.tryRequest()
-			r.reqQueued = true
+			r.lastReqQueued = true
 			return r.stateRequesting
 		case rpDeliveredValid:
 			r.stop(nil)
@@ -233,7 +233,7 @@ func (r *sentReq) stateNoMorePeers() reqStateFn {
 	select {
 	case <-time.After(retryQueue):
 		go r.tryRequest()
-		r.reqQueued = true
+		r.lastReqQueued = true
 		return r.stateRequesting
 	case ev := <-r.eventsCh:
 		r.update(ev)
@@ -260,22 +260,26 @@ func (r *sentReq) stateStopped() reqStateFn {
 func (r *sentReq) update(ev reqPeerEvent) {
 	switch ev.event {
 	case rpSent:
-		r.reqQueued = false
-		if ev.peer != nil {
-			r.reqSent = true
-		}
+		r.lastReqQueued = false
+		r.lastReqSentTo = ev.peer
 	case rpSoftTimeout:
-		r.reqSent = false
+		r.lastReqSentTo = nil
 		r.reqSrtoCount++
-	case rpHardTimeout, rpDeliveredValid, rpDeliveredInvalid:
+	case rpHardTimeout:
 		r.reqSrtoCount--
+	case rpDeliveredValid, rpDeliveredInvalid:
+		if ev.peer == r.lastReqSentTo {
+			r.lastReqSentTo = nil
+		} else {
+			r.reqSrtoCount--
+		}
 	}
 }
 
 // waiting returns true if the retrieval mechanism is waiting for an answer from
 // any peer
 func (r *sentReq) waiting() bool {
-	return r.reqQueued || r.reqSent || r.reqSrtoCount > 0
+	return r.lastReqQueued || r.lastReqSentTo != nil || r.reqSrtoCount > 0
 }
 
 // tryRequest tries to send the request to a new peer and waits for it to either