* NetDB: Try again to fix ISJ deadlock, thx devzero

This commit is contained in:
zzz
2011-09-04 20:26:47 +00:00
parent 42fc22eec9
commit 63958df99b
5 changed files with 106 additions and 59 deletions

View File

@@ -1,14 +1,18 @@
2011-09-04 zzz
* NetDB: Try again to fix ISJ deadlock, thx devzero
* Transport: Remove one global lock in OutboundMessageRegistry.
2011-09-03 zzz
* i2psnark: Fix "eject" button in certain browsers (ticket #511)
* UDP Inbound:
- Hopefully fix race NPE, thx dream
- Hopefully fix race NPE, thx devzero
- Catch some more fragment errors
- Exception and log tweaks
- Cleanups and javadocs
2011-09-02 zzz
* Console: Cache user-agent processing
* NetDB: Hopefully fix ISJ deadlock, thx dream
* NetDB: Hopefully fix ISJ deadlock, thx devzero
2011-09-02 sponge
* I2PSnark: Fix GUI html tag for adding a torrent, it was missing a space.

View File

@@ -22,6 +22,11 @@ public interface MessageSelector {
* If this returns true, the job specified by OutNetMessage.getOnReplyJob()
* will be run for every OutNetMessage associated with this selector
* (by InNetMessagePool), after calling setMessage() for that ReplyJob.
*
* WARNING this is called from within OutboundMessageSelector.getOriginalMessages()
* inside a lock and can lead to deadlocks if the selector does too much in isMatch().
* Until the lock is removed, take care to keep it simple.
*
*/
public boolean isMatch(I2NPMessage message);

View File

@@ -18,7 +18,7 @@ public class RouterVersion {
/** deprecated */
public final static String ID = "Monotone";
public final static String VERSION = CoreVersion.VERSION;
public final static long BUILD = 10;
public final static long BUILD = 11;
/** for example "-test" */
public final static String EXTRA = "";

View File

@@ -0,0 +1,87 @@
package net.i2p.router.networkdb.kademlia;
import net.i2p.data.Hash;
import net.i2p.data.RouterInfo;
import net.i2p.data.i2np.DatabaseSearchReplyMessage;
import net.i2p.router.JobImpl;
import net.i2p.router.RouterContext;
/**
* Ask the peer who sent us the DSRM for the RouterInfos...
*
* ... but If we have the routerInfo already, try to refetch it from that router itself,
* (if the info is old or we don't think it is floodfill)
* which will help us establish that router as a good floodfill and speed our
* integration into the network.
*
* Very similar to SingleLookupJob.
* This was all in IterativeLookupSelector.isMatch() but it caused deadlocks
* with OutboundMessageRegistry.getOriginalMessages()
* at both _search.newPeerToTry() and _search.failed().
*
* @since 0.8.9
*/
class IterativeLookupJob extends JobImpl {
private final DatabaseSearchReplyMessage _dsrm;
private final IterativeSearchJob _search;
public IterativeLookupJob(RouterContext ctx, DatabaseSearchReplyMessage dsrm, IterativeSearchJob search) {
super(ctx);
_dsrm = dsrm;
_search = search;
}
public void runJob() {
// TODO - dsrm.getFromHash() can't be trusted - check against the list of
// those we sent the search to in _search ?
Hash from = _dsrm.getFromHash();
// Chase the hashes from the reply
// 255 max, see comments in SingleLookupJob
int limit = Math.min(_dsrm.getNumReplies(), SingleLookupJob.MAX_TO_FOLLOW);
int newPeers = 0;
int oldPeers = 0;
int invalidPeers = 0;
for (int i = 0; i < limit; i++) {
Hash peer = _dsrm.getReply(i);
if (peer.equals(getContext().routerHash())) {
// us
oldPeers++;
continue;
}
if (peer.equals(from)) {
// wtf
invalidPeers++;
continue;
}
RouterInfo ri = getContext().netDb().lookupRouterInfoLocally(peer);
if (ri == null) {
// get the RI from the peer that told us about it
getContext().jobQueue().addJob(new IterativeFollowupJob(getContext(), peer, from, _search));
newPeers++;
} else if (ri.getPublished() < getContext().clock().now() - 60*60*1000 ||
!FloodfillNetworkDatabaseFacade.isFloodfill(ri)) {
// get an updated RI from the (now ff?) peer
getContext().jobQueue().addJob(new IterativeFollowupJob(getContext(), peer, peer, _search));
oldPeers++;
} else {
// add it to the sorted queue
// this will check if we have already tried it
// should we really add? if we know about it but skipped it,
// it was for some reason?
_search.newPeerToTry(peer);
oldPeers++;
}
}
long timeSent = _search.timeSent(from);
// assume 0 dup
if (timeSent > 0) {
getContext().profileManager().dbLookupReply(from, newPeers, oldPeers, invalidPeers, 0,
getContext().clock().now() - timeSent);
}
_search.failed(_dsrm.getFromHash(), false);
}
public String getName() { return "NetDb process DSRM"; }
}

View File

@@ -1,7 +1,6 @@
package net.i2p.router.networkdb.kademlia;
import net.i2p.data.Hash;
import net.i2p.data.RouterInfo;
import net.i2p.data.i2np.DatabaseSearchReplyMessage;
import net.i2p.data.i2np.DatabaseStoreMessage;
import net.i2p.data.i2np.I2NPMessage;
@@ -10,8 +9,7 @@ import net.i2p.router.RouterContext;
import net.i2p.util.Log;
/**
* Slightly modified version of FloodOnlyLookupSelector,
* plus it incorporates the functions of SingleLookupJob inline.
* Slightly modified version of FloodOnlyLookupSelector.
* Always follows the DSRM entries.
*
* @since 0.8.9
@@ -50,62 +48,15 @@ class IterativeLookupSelector implements MessageSelector {
} else if (message instanceof DatabaseSearchReplyMessage) {
DatabaseSearchReplyMessage dsrm = (DatabaseSearchReplyMessage)message;
if (_search.getKey().equals(dsrm.getSearchKey())) {
// TODO - dsrm.getFromHash() can't be trusted - check against the list of
// those we sent the search to in _search ?
Hash from = dsrm.getFromHash();
// Moved from FloodOnlyLookupMatchJob so it is called for all replies
// rather than just the last one
// Got a netDb reply pointing us at other floodfills...
if (_log.shouldLog(Log.INFO))
_log.info(_search.getJobId() + ": Processing DSRM via IterativeLookupJobs, apparently from " + from);
// Chase the hashes from the reply
// 255 max, see comments in SingleLookupJob
int limit = Math.min(dsrm.getNumReplies(), SingleLookupJob.MAX_TO_FOLLOW);
int newPeers = 0;
int oldPeers = 0;
int invalidPeers = 0;
for (int i = 0; i < limit; i++) {
Hash peer = dsrm.getReply(i);
if (peer.equals(_context.routerHash())) {
// us
oldPeers++;
continue;
}
if (peer.equals(from)) {
// wtf
invalidPeers++;
continue;
}
RouterInfo ri = _context.netDb().lookupRouterInfoLocally(peer);
if (ri == null) {
// get the RI from the peer that told us about it
_context.jobQueue().addJob(new IterativeFollowupJob(_context, peer, from, _search));
newPeers++;
} else if (ri.getPublished() < _context.clock().now() - 60*60*1000 ||
!FloodfillNetworkDatabaseFacade.isFloodfill(ri)) {
// get an updated RI from the (now ff?) peer
_context.jobQueue().addJob(new IterativeFollowupJob(_context, peer, peer, _search));
oldPeers++;
} else {
// add it to the sorted queue
// this will check if we have already tried it
// should we really add? if we know about it but skipped it,
// it was for some reason?
_search.newPeerToTry(peer);
oldPeers++;
}
}
long timeSent = _search.timeSent(from);
// assume 0 dup
if (timeSent > 0) {
_context.profileManager().dbLookupReply(from, newPeers, oldPeers, invalidPeers, 0,
_context.clock().now() - timeSent);
if (_log.shouldLog(Log.INFO)) {
Hash from = dsrm.getFromHash();
_log.info(_search.getJobId() + ": Processing DSRM via IterativeLookupJob, apparently from " + from);
}
_search.failed(dsrm.getFromHash(), false);
// was inline, now in IterativeLookupJob due to deadlocks
_context.jobQueue().addJob(new IterativeLookupJob(_context, dsrm, _search));
// fall through, always return false, we do not wish the match job to be called
}
}