Changeset 42acdc3


Ignore:
Timestamp:
Jul 10, 2011 12:00:58 AM (9 years ago)
Author:
zzz <zzz@…>
Branches:
master
Children:
3455d3f
Parents:
4fd1800
Message:
  • Router Clock: First cut at recognizing and reacting to large system clock shifts by partially restarting the router. Also improve restarts initiated from config.jsp Tickets #465, #468, #494
  • UPnP: Wait for a while to ensure port removal at shutdown or restart
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • apps/routerconsole/java/src/net/i2p/router/web/ConfigNetHandler.java

    r4fd1800 r42acdc3  
    272272            hiddenSwitch();
    273273        } else if (restartRequired) {
    274             if (_context.hasWrapper()) {
     274            //if (_context.hasWrapper()) {
    275275                // Wow this dumps all conns immediately and really isn't nice
    276276                addFormNotice("Performing a soft restart");
    277277                _context.router().restart();
    278                 addFormNotice("Soft restart complete");
     278                // restart() returns immediately now
     279                //addFormNotice("Soft restart complete");
    279280
    280281                // Most of the time we aren't changing addresses, just enabling or disabling
     
    286287                //_context.router().rebuildRouterInfo();
    287288                //addFormNotice("Router Info rebuilt");
    288             } else {
     289            //} else {
    289290                // There's a few changes that don't really require restart (e.g. enabling inbound TCP)
    290291                // But it would be hard to get right, so just do a restart.
    291                 addFormError(_("Gracefully restarting I2P to change published router address"));
    292                 if (_context.hasWrapper())
    293                     _context.addShutdownTask(new ConfigServiceHandler.UpdateWrapperManagerTask(Router.EXIT_GRACEFUL_RESTART));
    294                 _context.router().shutdownGracefully(Router.EXIT_GRACEFUL_RESTART);
    295             }
     292                //addFormError(_("Gracefully restarting I2P to change published router address"));
     293                //_context.router().shutdownGracefully(Router.EXIT_GRACEFUL_RESTART);
     294            //}
    296295        }
    297296    }
  • core/java/src/net/i2p/time/Timestamper.java

    r4fd1800 r42acdc3  
    1818 */
    1919public class Timestamper implements Runnable {
    20     private I2PAppContext _context;
     20    private final I2PAppContext _context;
    2121    private Log _log;
    2222    private final List<String> _servers;
     
    2727    private int _consecutiveFails;
    2828    private volatile boolean _disabled;
    29     private boolean _daemon;
     29    private final boolean _daemon;
    3030    private boolean _initialized;
    3131    private boolean _wellSynced;
     
    6161        _servers = new ArrayList(3);
    6262        _listeners = new CopyOnWriteArrayList();
     63        _context = ctx;
     64        _daemon = daemon;
     65        // DO NOT initialize _log here, stack overflow via LogManager init loop
     66
    6367        // Don't bother starting a thread if we are disabled.
    6468        // This means we no longer check every 5 minutes to see if we got enabled,
     
    7074            return;
    7175        }
    72         _context = ctx;
    73         _daemon = daemon;
    74         _initialized = false;
    75         _wellSynced = false;
    7676        if (lsnr != null)
    7777            _listeners.add(lsnr);
     
    123123            }
    124124        } catch (InterruptedException ie) {}
     125    }
     126   
     127    /**
     128     *  Update the time immediately.
     129     *  @since 0.8.8
     130     */
     131    public void timestampNow() {
     132         if (_initialized && _isRunning && (!_disabled) && _timestamperThread != null)
     133             _timestamperThread.interrupt();
    125134    }
    126135   
  • core/java/src/net/i2p/util/Clock.java

    r4fd1800 r42acdc3  
    11package net.i2p.util;
    22
    3 import java.util.Iterator;
    43import java.util.Set;
    54import java.util.concurrent.CopyOnWriteArraySet;
     
    2221    protected final I2PAppContext _context;
    2322    private final Timestamper _timestamper;
    24     protected final long _startedOn;
     23    protected long _startedOn;
    2524    protected boolean _statCreated;
    2625    protected volatile long _offset;
    2726    protected boolean _alreadyChanged;
    28     private final Set _listeners;
     27    private final Set<ClockUpdateListener> _listeners;
    2928   
    3029    public Clock(I2PAppContext context) {
     
    3433        _startedOn = System.currentTimeMillis();
    3534    }
     35
    3636    public static Clock getInstance() {
    3737        return I2PAppContext.getGlobalContext().clock();
     
    152152
    153153    protected void fireOffsetChanged(long delta) {
    154             for (Iterator iter = _listeners.iterator(); iter.hasNext();) {
    155                 ClockUpdateListener lsnr = (ClockUpdateListener) iter.next();
     154            for (ClockUpdateListener lsnr : _listeners) {
    156155                lsnr.offsetChanged(delta);
    157156            }
    158157    }
    159158
    160     public static interface ClockUpdateListener {
     159    public interface ClockUpdateListener {
     160
     161        /**
     162         *  @param delta = (new offset - old offset),
     163         *         where each offset = (now() - System.currentTimeMillis())
     164         */
    161165        public void offsetChanged(long delta);
    162166    }
  • router/java/src/net/i2p/router/Router.java

    r4fd1800 r42acdc3  
    5959 *
    6060 */
    61 public class Router {
     61public class Router implements RouterClock.ClockShiftListener {
    6262    private Log _log;
    6363    private RouterContext _context;
     
    7171    //private SessionKeyPersistenceHelper _sessionKeyPersistenceHelper;
    7272    private boolean _killVMOnEnd;
    73     private boolean _isAlive;
     73    private volatile boolean _isAlive;
    7474    private int _gracefulExitCode;
    7575    private I2PThread.OOMEventListener _oomListener;
     
    352352     * True if the router has tried to communicate with another router who is running a higher
    353353     * incompatible protocol version. 
    354      *
     354     * @deprecated unused
    355355     */
    356356    public boolean getHigherVersionSeen() { return _higherVersionSeen; }
     357
     358    /**
     359     * True if the router has tried to communicate with another router who is running a higher
     360     * incompatible protocol version. 
     361     * @deprecated unused
     362     */
    357363    public void setHigherVersionSeen(boolean seen) { _higherVersionSeen = seen; }
    358364   
     
    362368    public long getUptime() {
    363369        if ( (_context == null) || (_context.clock() == null) ) return 1; // racing on startup
    364         return _context.clock().now() - _context.clock().getOffset() - _started;
     370        return Math.max(1, _context.clock().now() - _context.clock().getOffset() - _started);
    365371    }
    366372   
     
    962968    public static final int EXIT_GRACEFUL_RESTART = 5;
    963969   
     970    /**
     971     *  Shutdown with no chance of cancellation
     972     */
    964973    public void shutdown(int exitCode) {
     974        ((RouterClock) _context.clock()).removeShiftListener(this);
    965975        _isAlive = false;
    966976        _context.random().saveSeed();
     
    12011211   
    12021212    /**
     1213     *  The clock shift listener.
     1214     *  Restart the router if we should.
     1215     *
     1216     *  @since 0.8.8
     1217     */
     1218    public void clockShift(long delta) {
     1219        if (gracefulShutdownInProgress() || !_isAlive)
     1220            return;
     1221        if (delta > -60*1000 && delta < 60*1000)
     1222            return;
     1223        if (_context.commSystem().countActivePeers() <= 0)
     1224            return;
     1225        if (delta > 0)
     1226            _log.error("Restarting after large clock shift forward by " + DataHelper.formatDuration(delta));
     1227        else
     1228            _log.error("Restarting after large clock shift backward by " + DataHelper.formatDuration(0 - delta));
     1229        restart();
     1230    }
     1231
     1232    /**
    12031233     *  A "soft" restart, primarily of the comm system, after
    12041234     *  a port change or large step-change in system time.
    12051235     *  Does not stop the whole JVM, so it is safe even in the absence
    12061236     *  of the wrapper.
    1207      *  This is not a graceful restart - all peer connections are dropped.
    1208      */
    1209     public void restart() {
     1237     *  This is not a graceful restart - all peer connections are dropped immediately.
     1238     *
     1239     *  As of 0.8.8, this returns immediately and does the actual restart in a separate thread.
     1240     *  Poll isAlive() if you need to know when the restart is complete.
     1241     */
     1242    public synchronized void restart() {
     1243        if (gracefulShutdownInProgress() || !_isAlive)
     1244            return;
     1245        ((RouterClock) _context.clock()).removeShiftListener(this);
    12101246        _isAlive = false;
    1211        
    1212         try { _context.commSystem().restart(); } catch (Throwable t) { _log.log(Log.CRIT, "Error restarting the comm system", t); }
    1213         try { _context.clientManager().restart(); } catch (Throwable t) { _log.log(Log.CRIT, "Error restarting the client manager", t); }
    1214         try { _context.tunnelManager().restart(); } catch (Throwable t) { _log.log(Log.CRIT, "Error restarting the tunnel manager", t); }
    1215         try { _context.peerManager().restart(); } catch (Throwable t) { _log.log(Log.CRIT, "Error restarting the peer manager", t); }
    1216         try { _context.netDb().restart(); } catch (Throwable t) { _log.log(Log.CRIT, "Error restarting the networkDb", t); }
    1217        
    1218         //try { _context.jobQueue().restart(); } catch (Throwable t) { _log.log(Log.CRIT, "Error restarting the job queue", t); }
    1219        
    1220         _log.log(Log.CRIT, "Restart teardown complete... ");
    1221         try { Thread.sleep(10*1000); } catch (InterruptedException ie) {}
    1222        
    1223         _log.log(Log.CRIT, "Restarting...");
    1224        
    1225         _isAlive = true;
    1226         _started = _context.clock().now();
    1227        
    1228         _log.log(Log.CRIT, "Restart complete");
     1247        Thread t = new Thread(new Restarter(), "Router Restart");
     1248        t.start();
     1249    }   
     1250
     1251    /**
     1252     *  @since 0.8.8
     1253     */
     1254    private class Restarter implements Runnable {
     1255        public void run() {
     1256            _started = _context.clock().now();
     1257            _log.error("Stopping the router for a restart...");
     1258            _log.logAlways(Log.WARN, "Stopping the client manager");
     1259            try { _context.clientManager().shutdown(); } catch (Throwable t) { _log.log(Log.CRIT, "Error stopping the client manager", t); }
     1260            _log.logAlways(Log.WARN, "Stopping the comm system");
     1261            try { _context.commSystem().restart(); } catch (Throwable t) { _log.log(Log.CRIT, "Error restarting the comm system", t); }
     1262            _log.logAlways(Log.WARN, "Stopping the tunnel manager");
     1263            try { _context.tunnelManager().restart(); } catch (Throwable t) { _log.log(Log.CRIT, "Error restarting the tunnel manager", t); }
     1264
     1265            //try { _context.peerManager().restart(); } catch (Throwable t) { _log.log(Log.CRIT, "Error restarting the peer manager", t); }
     1266            //try { _context.netDb().restart(); } catch (Throwable t) { _log.log(Log.CRIT, "Error restarting the networkDb", t); }
     1267            //try { _context.jobQueue().restart(); } catch (Throwable t) { _log.log(Log.CRIT, "Error restarting the job queue", t); }
     1268       
     1269            _log.logAlways(Log.WARN, "Router teardown complete, restarting the router...");
     1270            try { Thread.sleep(10*1000); } catch (InterruptedException ie) {}
     1271       
     1272            _log.logAlways(Log.WARN, "Restarting the comm system");
     1273            _log.logAlways(Log.WARN, "Restarting the tunnel manager");
     1274            _log.logAlways(Log.WARN, "Restarting the client manager");
     1275            try { _context.clientManager().startup(); } catch (Throwable t) { _log.log(Log.CRIT, "Error stopping the client manager", t); }
     1276       
     1277            _isAlive = true;
     1278            rebuildRouterInfo();
     1279       
     1280            _log.logAlways(Log.WARN, "Restart complete");
     1281            ((RouterClock) _context.clock()).addShiftListener(Router.this);
     1282        }
    12291283    }
    12301284   
  • router/java/src/net/i2p/router/RouterClock.java

    r4fd1800 r42acdc3  
    11package net.i2p.router;
    22
     3import java.util.Set;
     4import java.util.concurrent.CopyOnWriteArraySet;
     5
     6import net.i2p.data.DataHelper;
    37import net.i2p.util.Clock;
    48import net.i2p.util.Log;
     
    2731    public static final int DEFAULT_STRATUM = 8;
    2832    private static final int WORST_STRATUM = 16;
     33
    2934    /** the max NTP Timestamper delay is 30m right now, make this longer than that */
    3035    private static final long MIN_DELAY_FOR_WORSE_STRATUM = 45*60*1000;
     
    3540    private int _lastStratum;
    3641
    37     private final RouterContext _contextRC;
     42    /**
     43     *  If the system clock shifts by this much (positive or negative),
     44     *  call the callback, we probably need a soft restart.
     45     *  @since 0.8.8
     46     */
     47    private static final long MASSIVE_SHIFT = 75*1000;
     48    private final Set<ClockShiftListener> _shiftListeners;
     49    private volatile long _lastShiftNanos;
    3850
    3951    public RouterClock(RouterContext context) {
    4052        super(context);
    41         _contextRC = context;
    4253        _lastStratum = WORST_STRATUM;
     54        _lastSlewed = System.currentTimeMillis();
     55        _shiftListeners = new CopyOnWriteArraySet();
     56        _lastShiftNanos = System.nanoTime();
    4357    }
    4458
     
    99113           
    100114            // If so configured, check sanity of proposed clock offset
    101             if (_contextRC.getBooleanPropertyDefaultTrue("router.clockOffsetSanityCheck") &&
     115            if (_context.getBooleanPropertyDefaultTrue("router.clockOffsetSanityCheck") &&
    102116                _alreadyChanged) {
    103117
    104118                // Try calculating peer clock skew
    105                 long currentPeerClockSkew = _contextRC.commSystem().getFramedAveragePeerClockSkew(50);
     119                long currentPeerClockSkew = ((RouterContext)_context).commSystem().getFramedAveragePeerClockSkew(50);
    106120
    107121                    // Predict the effect of applying the proposed clock offset
     
    132146           
    133147            if (!_statCreated) {
    134                 _contextRC.statManager().createRequiredRateStat("clock.skew", "Clock step adjustment (ms)", "Clock", new long[] { 10*60*1000, 3*60*60*1000, 24*60*60*60 });
     148                _context.statManager().createRequiredRateStat("clock.skew", "Clock step adjustment (ms)", "Clock", new long[] { 10*60*1000, 3*60*60*1000, 24*60*60*60 });
    135149                _statCreated = true;
    136150            }
    137             _contextRC.statManager().addRateData("clock.skew", delta, 0);
     151            _context.statManager().addRateData("clock.skew", delta, 0);
    138152            _desiredOffset = offsetMs;
    139153        } else {
     
    178192        // copy the global, so two threads don't both increment or decrement _offset
    179193        long offset = _offset;
    180         if (systemNow >= _lastSlewed + MAX_SLEW) {
     194        long sinceLastSlewed = systemNow - _lastSlewed;
     195        if (sinceLastSlewed >= MASSIVE_SHIFT ||
     196            sinceLastSlewed <= 0 - MASSIVE_SHIFT) {
     197            _lastSlewed = systemNow;
     198            notifyMassive(sinceLastSlewed);
     199        } else if (sinceLastSlewed >= MAX_SLEW) {
    181200            // copy the global
    182201            long desiredOffset = _desiredOffset;
     
    198217
    199218    /*
     219     *  A large system clock shift happened. Tell people about it.
     220     *
     221     *  @since 0.8.8
     222     */
     223    private void notifyMassive(long shift) {
     224        long nowNanos = System.nanoTime();
     225        // try to prevent dups, not guaranteed
     226        // nanoTime() isn't guaranteed to be monotonic either :(
     227        if (nowNanos < _lastShiftNanos + MASSIVE_SHIFT)
     228            return;
     229        _lastShiftNanos = nowNanos;
     230
     231        // reset these so the offset can be reset by the timestamper again
     232        _startedOn = System.currentTimeMillis();
     233        _alreadyChanged = false;
     234        getTimestamper().timestampNow();
     235
     236        if (shift > 0)
     237            getLog().log(Log.CRIT, "Large clock shift forward by " + DataHelper.formatDuration(shift));
     238        else
     239            getLog().log(Log.CRIT, "Large clock shift backward by " + DataHelper.formatDuration(0 - shift));
     240
     241        for (ClockShiftListener lsnr : _shiftListeners) {
     242            lsnr.clockShift(shift);
     243        }
     244    }
     245
     246    /*
     247     *  Get notified of massive System clock shifts, positive or negative -
     248     *  generally a minute or more.
     249     *  The adjusted (offset) clock changes by the same amount.
     250     *  The offset itself did not change.
     251     *  Warning - duplicate notifications may occur.
     252     *
     253     *  @since 0.8.8
     254     */
     255    public void addShiftListener(ClockShiftListener lsnr) {
     256            _shiftListeners.add(lsnr);
     257    }
     258
     259    /*
     260     *  @since 0.8.8
     261     */
     262    public void removeShiftListener(ClockShiftListener lsnr) {
     263            _shiftListeners.remove(lsnr);
     264    }
     265
     266    /*
     267     *  @since 0.8.8
     268     */
     269    public interface ClockShiftListener {
     270
     271        /**
     272         *  @param delta The system clock and adjusted clock just changed by this much,
     273         *               in milliseconds (approximately)
     274         */
     275        public void clockShift(long delta);
     276    }
     277
     278    /*
    200279     *  How far we still have to slew, for diagnostics
    201280     *  @since 0.7.12
     
    205284        return _desiredOffset - _offset;
    206285    }
    207    
    208286}
  • router/java/src/net/i2p/router/client/ClientManager.java

    r4fd1800 r42acdc3  
    9292        try { Thread.sleep(2*1000); } catch (InterruptedException ie) {}
    9393       
    94         int port = ClientManagerFacadeImpl.DEFAULT_PORT;
    95         String portStr = _ctx.router().getConfigSetting(ClientManagerFacadeImpl.PROP_CLIENT_PORT);
    96         if (portStr != null) {
    97             try {
    98                 port = Integer.parseInt(portStr);
    99             } catch (NumberFormatException nfe) {
    100                 _log.error("Error setting the port: " + portStr + " is not valid", nfe);
    101             }
    102         }
     94        int port = _ctx.getProperty(ClientManagerFacadeImpl.PROP_CLIENT_PORT,
     95                                    ClientManagerFacadeImpl.DEFAULT_PORT);
    10396        startListeners(port);
    10497    }
  • router/java/src/net/i2p/router/startup/BootCommSystemJob.java

    r4fd1800 r42acdc3  
    1212import net.i2p.router.JobImpl;
    1313import net.i2p.router.RouterContext;
     14import net.i2p.router.RouterClock;
    1415import net.i2p.util.Log;
    1516
     
    4647
    4748        getContext().jobQueue().addJob(new ReadConfigJob(getContext()));
     49        ((RouterClock) getContext().clock()).addShiftListener(getContext().router());
    4850    }
    4951       
  • router/java/src/net/i2p/router/transport/UPnP.java

    r4fd1800 r42acdc3  
    9292        }
    9393
     94        /**
     95         *  WARNING - Blocking up to 2 seconds
     96         */
    9497        public void terminate() {
     98                // this gets spun off in a thread...
    9599                unregisterPortMappings();
     100                // If we stop too early and we've forwarded multiple ports,
     101                // the later ones don't get unregistered
     102                int i = 0;
     103                while (i++ < 20 && !portsForwarded.isEmpty()) {
     104                        try {
     105                                Thread.sleep(100);
     106                        } catch (InterruptedException ie) {}
     107                }
    96108                super.stop();
    97109                _router = null;
     
    673685        private void unregisterPorts(Set<ForwardPort> portsToForwardNow) {
    674686                Thread t = new Thread(new UnregisterPortsThread(portsToForwardNow));
    675                 t.setName("UPnP Port Opener " + (++__id));
     687                t.setName("UPnP Port Closer " + (++__id));
    676688                t.setDaemon(true);
    677689                t.start();
Note: See TracChangeset for help on using the changeset viewer.