Opened 7 years ago

Last modified 3 years ago

#662 accepted enhancement

Logger that drops records on overflow

Reported by: Zlatin Balevsky Owned by: zzz
Priority: maintenance Milestone: eventually
Component: api/utils Version: 0.9
Keywords: log logging dos ddos Cc:
Parent Tickets: Sensitive: no

Description

Since a malicious third party or a bug can cause the logger queue to reach capacity and in turn halt the entire router, it may be better to drop records instead of blocking until there is free space in the logger queue. The suggestion below is one possible implementation.

#
# old_revision [47f04ff21e8edd00134a0fd68219f86fd3caba36]
#
# patch "core/java/src/net/i2p/util/LogManager.java"
#  from [d19da14468eb6f6ba1a6e26e794cc69640a5a650]
#    to [a6b4630bf4f55643932dbebbeb67a07841d105d1]
# 
# patch "core/java/src/net/i2p/util/LogWriter.java"
#  from [5202d87296c86d2127b1a660a302e182a9b0eb25]
#    to [078385b6255ee65b662a822c9ca198ebb9605b10]
#
============================================================
--- core/java/src/net/i2p/util/LogManager.java	d19da14468eb6f6ba1a6e26e794cc69640a5a650
+++ core/java/src/net/i2p/util/LogManager.java	a6b4630bf4f55643932dbebbeb67a07841d105d1
@@ -19,11 +19,12 @@ import java.util.Properties;
 import java.util.List;
 import java.util.Locale;
 import java.util.Properties;
-import java.util.Queue;
 import java.util.Set;
 import java.util.TimeZone;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
 
 import net.i2p.I2PAppContext;
 import net.i2p.data.DataHelper;
@@ -87,7 +88,15 @@ public class LogManager {
     /** who clears and writes our records */
     private LogWriter _writer;
 
+    /** How many log records were missed due to queue capacity */
+    private final AtomicLong _missedRecords = new AtomicLong(0);
     /** 
+     * Record indicating that the queue has reached capacity.  
+     * Accessed only from LogWriter thread
+     */
+    private LogRecord _overflowRecord;
+    
+    /** 
      * default log level for logs that aren't explicitly controlled 
      * through a LogLimit in _limits
      */
@@ -243,15 +252,8 @@ public class LogManager {
 
         boolean success = _records.offer(record);
         if (!success) {
-            // the writer waits 10 seconds *or* until we tell them to wake up
-            // before rereading the config and writing out any log messages
-            synchronized (_writer) {
-                _writer.notifyAll();
-            }
-            // block as a way of slowing down out-of-control loggers (a little)
-            try {
-                _records.put(record);
-            } catch (InterruptedException ie) {}
+            // record that a record was dropped without blocking
+        	_missedRecords.incrementAndGet();
         }
     }
     
@@ -619,12 +621,41 @@ public class LogManager {
     }
 
     /**
+     * @return a record to log only if one is present.  Does not care for overflow, etc.
+     */
+    LogRecord pollNextRecord() {
+    	return _records.poll();
+    }
+    
+    /**
      *  Zero-copy.
      *  For the LogWriter
      *  @since 0.8.2
      */
-    Queue<LogRecord> getQueue() {
-        return _records;
+    LogRecord getNextRecord(long interval) throws InterruptedException {
+    	
+    	LogRecord rv;
+    	
+    	// if we reached overflow last time log that.
+    	if (_overflowRecord != null) {
+    		rv = _overflowRecord;
+    		_overflowRecord = null;
+    		return rv;
+    	}
+    	
+    	// always free some space in the queue, otherwise may never recover from overflow
+    	rv = _records.poll(interval, TimeUnit.MILLISECONDS);
+    	
+    	// check if we dropped something
+    	long dropped = _missedRecords.getAndSet(0);
+    	if (dropped > 0) {
+    		// prepare an overflow record to be read next time
+    		_overflowRecord = new LogRecord(LogManager.class,
+    				"OVERFLOW", "LogWriter", Log.CRIT, 
+    				"Logger queue overflowed!  Records dropped: "+dropped, 
+    				null);
+    	}
+        return rv;
     }
 
     public char[] getFormat() {
@@ -677,8 +708,6 @@ public class LogManager {
                     Thread.sleep(250);
                 } catch (InterruptedException ie) {}
             }
-            // this could generate out-of-order messages
-            _writer.flushRecords(false);
             _writer.stopWriting();
             synchronized (_writer) {
                 _writer.notifyAll();
============================================================
--- core/java/src/net/i2p/util/LogWriter.java	5202d87296c86d2127b1a660a302e182a9b0eb25
+++ core/java/src/net/i2p/util/LogWriter.java	078385b6255ee65b662a822c9ca198ebb9605b10
@@ -14,7 +14,6 @@ import java.io.Writer;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
-import java.util.Queue;
 
 /**
  * Log writer thread that pulls log records from the LogManager, writes them to
@@ -35,6 +34,7 @@ class LogWriter implements Runnable {
     private final LogManager _manager;
 
     private volatile boolean _write;
+    private volatile Thread _me;
     private static final int MAX_DISKFULL_MESSAGES = 8;
     private int _diskFullMessageCount;
     
@@ -45,57 +45,46 @@ class LogWriter implements Runnable {
 
     public void stopWriting() {
         _write = false;
+        Thread me = _me;
+        if (me != null)
+        	me.interrupt();
     }
     
     public void run() {
         _write = true;
+        _me = Thread.currentThread();
         try {
-            // Don't rotate and open until needed
-            //rotateFile();
-            while (_write) {
-                flushRecords();
-                if (_write)
-                    rereadConfig();
-            }
-            //System.err.println("Done writing");
+        	flushRecords();
+        } catch (InterruptedException timeToDie) {
+        	if (_write)
+        		throw new IllegalStateException("interrupted, but not shutdown? ",timeToDie);
         } catch (Exception e) {
             System.err.println("Error writing the log: " + e);
             e.printStackTrace();
+        } finally {
+        	try {
+        		flushRemainingRecords();
+        	} finally {
+        		closeFile();
+        	}
         }
-        closeFile();
     }
+    
+    private void flushRemainingRecords() {
+    	LogRecord record;
+    	while((record = _manager.pollNextRecord()) != null)
+    		writeRecord(record);
+    }
 
-    public void flushRecords() { flushRecords(true); }
-    public void flushRecords(boolean shouldWait) {
-        try {
-            // zero copy, drain the manager queue directly
-            Queue<LogRecord> records = _manager.getQueue();
-            if (records == null) return;
-            if (!records.isEmpty()) {
-                LogRecord rec;
-                while ((rec = records.poll()) != null) {
-                    writeRecord(rec);
-                }
-                try {
-                    if (_currentOut != null)
-                        _currentOut.flush();
-                } catch (IOException ioe) {
-                    if (_write && ++_diskFullMessageCount < MAX_DISKFULL_MESSAGES)
-                        System.err.println("Error writing the router log - disk full? " + ioe);
-                }
-            }
-        } catch (Throwable t) {
-            t.printStackTrace();
-        } finally {
-            if (shouldWait) {
-                try { 
-                    synchronized (this) {
-                        this.wait(FLUSH_INTERVAL); 
-                    }
-                } catch (InterruptedException ie) { // nop
-                }
-            }
-        }
+    public void flushRecords() throws IOException,InterruptedException { 
+    	while(_write)  {
+    		LogRecord record = _manager.getNextRecord(FLUSH_INTERVAL);
+    		if (record != null)
+    			writeRecord(record);
+    		else
+    			_currentOut.flush();
+            rereadConfig();
+    	}
     }
     
     public String currentFile() {



Subtickets

Change History (3)

comment:1 Changed 7 years ago by zzz

Milestone: 0.9.2
Owner: set to zzz
Status: newaccepted

0.9.1-2 contains two new options, logger.logBufferSize (default 1024) and logger.dropOnOverflow (default false).

I don't think default true for dropOnOverflow is a good idea.

I also did not implement the long notice when logs are dropped. Leaving open until then.

comment:2 Changed 7 years ago by str4d

Milestone: 0.9.2

comment:3 Changed 3 years ago by zzz

Milestone: eventually

Very low priority since not default.

Things were refactored a while ago, but general approach to do this would be:

1) in LogManager?, add a getDroppedRecordCount() method that does getAndSet(0)
2) in LogWriter?.flushRecords(), call the new method and output a message, probably in a method similar to writeDupMessage()

Note: See TracTickets for help on using tickets.