From a623929ce3c2ff762752fa3f06599c8f58476697 Mon Sep 17 00:00:00 2001
From: Jon Wayne Parrott <jonwayne@google.com>
Date: Fri, 12 May 2017 13:30:48 -0700
Subject: [PATCH] Overhaul logging background thread transport (#3407)

---
 .../google/cloud/logging/handlers/handlers.py |   6 +-
 .../handlers/transports/background_thread.py  | 274 ++++++++++------
 logging/nox.py                                |   4 +-
 .../transports/test_background_thread.py      | 298 +++++++++++++-----
 4 files changed, 401 insertions(+), 181 deletions(-)

diff --git a/logging/google/cloud/logging/handlers/handlers.py b/logging/google/cloud/logging/handlers/handlers.py
index ae66c4516ee9..62ec6c6d561a 100644
--- a/logging/google/cloud/logging/handlers/handlers.py
+++ b/logging/google/cloud/logging/handlers/handlers.py
@@ -20,7 +20,11 @@
 
 DEFAULT_LOGGER_NAME = 'python'
 
-EXCLUDED_LOGGER_DEFAULTS = ('google.cloud', 'oauth2client')
+EXCLUDED_LOGGER_DEFAULTS = (
+    'google.cloud',
+    'google.auth',
+    'google_auth_httplib2',
+)
 
 
 class CloudLoggingHandler(logging.StreamHandler):
diff --git a/logging/google/cloud/logging/handlers/transports/background_thread.py b/logging/google/cloud/logging/handlers/transports/background_thread.py
index 9c8ea85c937a..4b651243be45 100644
--- a/logging/google/cloud/logging/handlers/transports/background_thread.py
+++ b/logging/google/cloud/logging/handlers/transports/background_thread.py
@@ -17,139 +17,231 @@
 Uses a background worker to log to Stackdriver Logging asynchronously.
 """
 
+from __future__ import print_function
+
 import atexit
 import copy
+import logging
 import threading
 
+from six.moves import range
+from six.moves import queue
+
 from google.cloud.logging.handlers.transports.base import Transport
 
-_WORKER_THREAD_NAME = 'google.cloud.logging.handlers.transport.Worker'
+_DEFAULT_GRACE_PERIOD = 5.0  # Seconds
+_DEFAULT_MAX_BATCH_SIZE = 10
+_WORKER_THREAD_NAME = 'google.cloud.logging.Worker'
+_WORKER_TERMINATOR = object()
+_LOGGER = logging.getLogger(__name__)
 
 
-class _Worker(object):
-    """A threaded worker that writes batches of log entries
+def _get_many(queue_, max_items=None):
+    """Get multiple items from a Queue.
 
-    Writes entries to the logger API.
+    Gets at least one (blocking) and at most ``max_items`` items
+    (non-blocking) from a given Queue. Does not mark the items as done.
 
-    This class reuses a single :class:`Batch` method to write successive
-    entries.
+    :type queue_: :class:`~queue.Queue`
+    :param queue_: The Queue to get items from.
 
-    Currently, the only public methods are constructing it (which also starts
-    it) and enqueuing :class:`Logger` (record, message) pairs.
+    :type max_items: int
+    :param max_items: The maximum number of items to get. If ``None``, then all
+        available items in the queue are returned.
+
+    :rtype: Sequence
+    :returns: A sequence of items retrieved from the queue.
     """
+    # Always return at least one item.
+    items = [queue_.get()]
+    while max_items is None or len(items) < max_items:
+        try:
+            items.append(queue_.get_nowait())
+        except queue.Empty:
+            break
+    return items
 
-    def __init__(self, logger):
-        self.started = False
-        self.stopping = False
-        self.stopped = False
 
-        # _entries_condition is used to signal from the main thread whether
-        # there are any waiting queued logger entries to be written
-        self._entries_condition = threading.Condition()
+class _Worker(object):
+    """A background thread that writes batches of log entries.
 
-        # _stop_condition is used to signal from the worker thread to the
-        # main thread that it's finished its last entries
-        self._stop_condition = threading.Condition()
+    :type cloud_logger: :class:`~google.cloud.logging.logger.Logger`
+    :param cloud_logger: The logger to send entries to.
 
-        # This object continually reuses the same :class:`Batch` object to
-        # write multiple entries at the same time.
-        self.logger = logger
-        self.batch = self.logger.batch()
+    :type grace_period: float
+    :param grace_period: The amount of time to wait for pending logs to
+        be submitted when the process is shutting down.
 
+    :type max_batch_size: int
+    :param max_batch_size: The maximum number of items to send at a time
+        in the background thread.
+    """
+
+    def __init__(self, cloud_logger, grace_period=_DEFAULT_GRACE_PERIOD,
+                 max_batch_size=_DEFAULT_MAX_BATCH_SIZE):
+        self._cloud_logger = cloud_logger
+        self._grace_period = grace_period
+        self._max_batch_size = max_batch_size
+        self._queue = queue.Queue(0)
+        self._operational_lock = threading.Lock()
         self._thread = None
 
-        # Number in seconds of  how long to wait for worker to send remaining
-        self._stop_timeout = 5
+    @property
+    def is_alive(self):
+        """Returns True is the background thread is running."""
+        return self._thread is not None and self._thread.is_alive()
 
-        self._start()
+    def _safely_commit_batch(self, batch):
+        total_logs = len(batch.entries)
 
-    def _run(self):
+        try:
+            if total_logs > 0:
+                batch.commit()
+                _LOGGER.debug('Submitted %d logs', total_logs)
+        except Exception:
+            _LOGGER.error(
+                'Failed to submit %d logs.', total_logs, exc_info=True)
+
+    def _thread_main(self):
         """The entry point for the worker thread.
 
-        Loops until ``stopping`` is set to :data:`True`, and commits batch
-        entries written during :meth:`enqueue`.
+        Pulls pending log entries off the queue and writes them in batches to
+        the Cloud Logger.
         """
-        try:
-            self._entries_condition.acquire()
-            self.started = True
-            while not self.stopping:
-                if len(self.batch.entries) == 0:
-                    # branch coverage of this code extremely flaky
-                    self._entries_condition.wait()  # pragma: NO COVER
-
-                if len(self.batch.entries) > 0:
-                    self.batch.commit()
-        finally:
-            self._entries_condition.release()
-
-        # main thread may be waiting for worker thread to finish writing its
-        # final entries. here we signal that it's done.
-        self._stop_condition.acquire()
-        self._stop_condition.notify()
-        self._stop_condition.release()
-
-    def _start(self):
-        """Called by this class's constructor
-
-        This method is responsible for starting the thread and registering
-        the exit handlers.
+        _LOGGER.debug('Background thread started.')
+
+        quit_ = False
+        while True:
+            batch = self._cloud_logger.batch()
+            items = _get_many(self._queue, max_items=self._max_batch_size)
+
+            for item in items:
+                if item is _WORKER_TERMINATOR:
+                    quit_ = True
+                    # Continue processing items, don't break, try to process
+                    # all items we got back before quitting.
+                else:
+                    batch.log_struct(**item)
+
+            self._safely_commit_batch(batch)
+
+            for _ in range(len(items)):
+                self._queue.task_done()
+
+            if quit_:
+                break
+
+        _LOGGER.debug('Background thread exited gracefully.')
+
+    def start(self):
+        """Starts the background thread.
+
+        Additionally, this registers a handler for process exit to attempt
+        to send any pending log entries before shutdown.
         """
-        try:
-            self._entries_condition.acquire()
+        with self._operational_lock:
+            if self.is_alive:
+                return
+
             self._thread = threading.Thread(
-                target=self._run, name=_WORKER_THREAD_NAME)
-            self._thread.setDaemon(True)
+                target=self._thread_main,
+                name=_WORKER_THREAD_NAME)
+            self._thread.daemon = True
             self._thread.start()
-        finally:
-            self._entries_condition.release()
-            atexit.register(self._stop)
+            atexit.register(self._main_thread_terminated)
+
+    def stop(self, grace_period=None):
+        """Signals the background thread to stop.
 
-    def _stop(self):
-        """Signals the worker thread to shut down
+        This does not terminate the background thread. It simply queues the
+        stop signal. If the main process exits before the background thread
+        processes the stop signal, it will be terminated without finishing
+        work. The ``grace_period`` parameter will give the background
+        thread some time to finish processing before this function returns.
 
-        Also waits for ``stop_timeout`` seconds for the worker to finish.
+        :type grace_period: float
+        :param grace_period: If specified, this method will block up to this
+            many seconds to allow the background thread to finish work before
+            returning.
 
-        This method is called by the ``atexit`` handler registered by
-         :meth:`start`.
+        :rtype: bool
+        :returns: True if the thread terminated. False if the thread is still
+            running.
         """
-        if not self.started or self.stopping:
-            return
+        if not self.is_alive:
+            return True
+
+        with self._operational_lock:
+            self._queue.put_nowait(_WORKER_TERMINATOR)
+
+            if grace_period is not None:
+                print('Waiting up to %d seconds.' % (grace_period,))
 
-        # lock the stop condition first so that the worker
-        # thread can't notify it's finished before we wait
-        self._stop_condition.acquire()
+            self._thread.join(timeout=grace_period)
 
-        # now notify the worker thread to shutdown
-        self._entries_condition.acquire()
-        self.stopping = True
-        self._entries_condition.notify()
-        self._entries_condition.release()
+            # Check this before disowning the thread, because after we disown
+            # the thread is_alive will be False regardless of if the thread
+            # exited or not.
+            success = not self.is_alive
 
-        # now wait for it to signal it's finished
-        self._stop_condition.wait(self._stop_timeout)
-        self._stop_condition.release()
-        self.stopped = True
+            self._thread = None
+
+            return success
+
+    def _main_thread_terminated(self):
+        """Callback that attempts to send pending logs before termination."""
+        if not self.is_alive:
+            return
+
+        if not self._queue.empty():
+            print(
+                'Program shutting down, attempting to send %d queued log '
+                'entries to Stackdriver Logging...' % (self._queue.qsize(),))
+
+        if self.stop(self._grace_period):
+            print('Sent all pending logs.')
+        else:
+            print('Failed to send %d pending logs.' % (self._queue.qsize(),))
 
     def enqueue(self, record, message):
-        """Queues up a log entry to be written by the background thread."""
-        try:
-            self._entries_condition.acquire()
-            if self.stopping:
-                return
-            info = {'message': message, 'python_logger': record.name}
-            self.batch.log_struct(info, severity=record.levelname)
-            self._entries_condition.notify()
-        finally:
-            self._entries_condition.release()
+        """Queues a log entry to be written by the background thread.
+
+        :type record: :class:`logging.LogRecord`
+        :param record: Python log record that the handler was called with.
+
+        :type message: str
+        :param message: The message from the ``LogRecord`` after being
+                        formatted by the associated log formatters.
+        """
+        self._queue.put_nowait({
+            'info': {
+                'message': message,
+                'python_logger': record.name,
+            },
+            'severity': record.levelname,
+        })
 
 
 class BackgroundThreadTransport(Transport):
-    """Aysnchronous transport that uses a background thread.
+    """Asynchronous transport that uses a background thread.
+
+    :type client: :class:`~google.cloud.logging.client.Client`
+    :param client: The Logging client.
+
+    :type name: str
+    :param name: the name of the logger.
+
+    :type grace_period: float
+    :param grace_period: The amount of time to wait for pending logs to
+        be submitted when the process is shutting down.
 
-    Writes logging entries as a batch process.
+    :type batch_size: int
+    :param batch_size: The maximum number of items to send at a time in the
+        background thread.
     """
 
-    def __init__(self, client, name):
+    def __init__(self, client, name, grace_period=_DEFAULT_GRACE_PERIOD,
+                 batch_size=_DEFAULT_MAX_BATCH_SIZE):
         http = copy.deepcopy(client._http)
         self.client = client.__class__(
             client.project, client._credentials, http)
diff --git a/logging/nox.py b/logging/nox.py
index 7f6447c56924..5d4751a955a5 100644
--- a/logging/nox.py
+++ b/logging/nox.py
@@ -39,7 +39,7 @@ def unit_tests(session, python_version):
         'py.test', '--quiet',
         '--cov=google.cloud.logging', '--cov=tests.unit', '--cov-append',
         '--cov-config=.coveragerc', '--cov-report=', '--cov-fail-under=97',
-        'tests/unit',
+        'tests/unit', *session.posargs
     )
 
 
@@ -63,7 +63,7 @@ def system_tests(session, python_version):
     session.install('.')
 
     # Run py.test against the system tests.
-    session.run('py.test', '-vvv', 'tests/system.py')
+    session.run('py.test', '-vvv', 'tests/system.py', *session.posargs)
 
 
 @nox.session
diff --git a/logging/tests/unit/handlers/transports/test_background_thread.py b/logging/tests/unit/handlers/transports/test_background_thread.py
index 3952a0b6422c..177c30e10863 100644
--- a/logging/tests/unit/handlers/transports/test_background_thread.py
+++ b/logging/tests/unit/handlers/transports/test_background_thread.py
@@ -13,12 +13,12 @@
 # limitations under the License.
 
 import logging
-import time
 import unittest
 
+import mock
 
-class TestBackgroundThreadHandler(unittest.TestCase):
 
+class TestBackgroundThreadHandler(unittest.TestCase):
     PROJECT = 'PROJECT'
 
     @staticmethod
@@ -29,36 +29,41 @@ def _get_target_class():
         return BackgroundThreadTransport
 
     def _make_one(self, *args, **kw):
-        return self._get_target_class()(*args, **kw)
-
-    def test_ctor(self):
+        worker_patch = mock.patch(
+            'google.cloud.logging.handlers.transports.'
+            'background_thread._Worker',
+            autospec=True)
+        with worker_patch as worker_mock:
+            return self._get_target_class()(*args, **kw), worker_mock
+
+    def test_constructor(self):
         client = _Client(self.PROJECT)
-        NAME = 'python_logger'
-        transport = self._make_one(client, NAME)
-        self.assertEqual(transport.worker.logger.name, NAME)
+        name = 'python_logger'
+
+        transport, worker = self._make_one(client, name)
+
+        logger, = worker.call_args[0]  # call_args[0] is *args.
+        self.assertEqual(logger.name, name)
 
     def test_send(self):
         client = _Client(self.PROJECT)
-        NAME = 'python_logger'
-        transport = self._make_one(client, NAME)
-        transport.worker.batch = client.logger(NAME).batch()
+        name = 'python_logger'
+
+        transport, _ = self._make_one(client, name)
 
         python_logger_name = 'mylogger'
         message = 'hello world'
-        record = logging.LogRecord(python_logger_name, logging.INFO,
-                                   None, None, message, None, None)
+        record = logging.LogRecord(
+            python_logger_name, logging.INFO,
+            None, None, message, None, None)
+
         transport.send(record, message)
 
-        EXPECTED_STRUCT = {
-            'message': message,
-            'python_logger': python_logger_name
-        }
-        EXPECTED_SENT = (EXPECTED_STRUCT, 'INFO')
-        self.assertEqual(transport.worker.batch.log_struct_called_with,
-                         EXPECTED_SENT)
+        transport.worker.enqueue.assert_called_once_with(record, message)
 
 
-class TestWorker(unittest.TestCase):
+class Test_Worker(unittest.TestCase):
+    NAME = 'python_logger'
 
     @staticmethod
     def _get_target_class():
@@ -69,79 +74,188 @@ def _get_target_class():
     def _make_one(self, *args, **kw):
         return self._get_target_class()(*args, **kw)
 
-    def test_ctor(self):
-        NAME = 'python_logger'
-        logger = _Logger(NAME)
-        worker = self._make_one(logger)
-        self.assertEqual(worker.batch, logger._batch)
+    def _start_with_thread_patch(self, worker):
+        with mock.patch('threading.Thread', new=_Thread) as thread_mock:
+            with mock.patch('atexit.register') as atexit_mock:
+                worker.start()
+                return thread_mock, atexit_mock
 
-    def test_run(self):
-        NAME = 'python_logger'
-        logger = _Logger(NAME)
-        worker = self._make_one(logger)
+    def test_constructor(self):
+        logger = _Logger(self.NAME)
+        grace_period = 50
+        max_batch_size = 50
 
-        python_logger_name = 'mylogger'
-        message = 'hello world'
-        record = logging.LogRecord(python_logger_name,
-                                   logging.INFO, None, None,
-                                   message, None, None)
+        worker = self._make_one(
+            logger, grace_period=grace_period, max_batch_size=max_batch_size)
 
-        worker._start()
+        self.assertEqual(worker._cloud_logger, logger)
+        self.assertEqual(worker._grace_period, grace_period)
+        self.assertEqual(worker._max_batch_size, max_batch_size)
+        self.assertFalse(worker.is_alive)
+        self.assertIsNone(worker._thread)
 
-        # first sleep is for branch coverage - ensure condition
-        # where queue is empty occurs
-        time.sleep(1)
-        # second polling is to avoid starting/stopping worker
-        # before anything ran
-        while not worker.started:
-            time.sleep(1)  # pragma: NO COVER
+    def test_start(self):
+        from google.cloud.logging.handlers.transports import background_thread
 
-        worker.enqueue(record, message)
-        # Set timeout to none so worker thread finishes
-        worker._stop_timeout = None
-        worker._stop()
-        self.assertTrue(worker.batch.commit_called)
+        worker = self._make_one(_Logger(self.NAME))
 
-    def test_run_after_stopped(self):
-        # No-op
-        name = 'python_logger'
-        logger = _Logger(name)
-        worker = self._make_one(logger)
+        _, atexit_mock = self._start_with_thread_patch(worker)
 
-        python_logger_name = 'mylogger'
-        message = 'hello world'
-        record = logging.LogRecord(python_logger_name,
-                                   logging.INFO, None, None,
-                                   message, None, None)
-
-        worker._start()
-        while not worker.started:
-            time.sleep(1)  # pragma: NO COVER
-        worker._stop_timeout = None
-        worker._stop()
-        worker.enqueue(record, message)
-        self.assertFalse(worker.batch.commit_called)
-        worker._stop()
+        self.assertTrue(worker.is_alive)
+        self.assertIsNotNone(worker._thread)
+        self.assertTrue(worker._thread.daemon)
+        self.assertEqual(worker._thread._target, worker._thread_main)
+        self.assertEqual(
+            worker._thread._name, background_thread._WORKER_THREAD_NAME)
+        atexit_mock.assert_called_once_with(worker._main_thread_terminated)
 
-    def test_run_enqueue_early(self):
-        # No-op
-        NAME = 'python_logger'
-        logger = _Logger(NAME)
-        worker = self._make_one(logger)
+        # Calling start again should not start a new thread.
+        current_thread = worker._thread
+        self._start_with_thread_patch(worker)
+        self.assertIs(current_thread, worker._thread)
 
-        python_logger_name = 'mylogger'
-        message = 'hello world'
-        record = logging.LogRecord(python_logger_name,
-                                   logging.INFO, None, None,
-                                   message, None, None)
+    def test_stop(self):
+        from google.cloud.logging.handlers.transports import background_thread
+
+        grace_period = 5.0
+        worker = self._make_one(_Logger(self.NAME))
+
+        self._start_with_thread_patch(worker)
+        thread = worker._thread
+
+        worker.stop(grace_period)
+
+        self.assertEqual(worker._queue.qsize(), 1)
+        self.assertEqual(
+            worker._queue.get(), background_thread._WORKER_TERMINATOR)
+        self.assertFalse(worker.is_alive)
+        self.assertIsNone(worker._thread)
+        self.assertEqual(thread._timeout, grace_period)
+
+        # Stopping twice should not be an error
+        worker.stop()
+
+    def test_stop_no_grace(self):
+        worker = self._make_one(_Logger(self.NAME))
+
+        self._start_with_thread_patch(worker)
+        thread = worker._thread
+
+        worker.stop()
+
+        self.assertEqual(thread._timeout, None)
+
+    def test__main_thread_terminated(self):
+        worker = self._make_one(_Logger(self.NAME))
+
+        self._start_with_thread_patch(worker)
+        worker._main_thread_terminated()
+
+        self.assertFalse(worker.is_alive)
+
+        # Calling twice should not be an error
+        worker._main_thread_terminated()
+
+    def test__main_thread_terminated_non_empty_queue(self):
+        worker = self._make_one(_Logger(self.NAME))
 
+        self._start_with_thread_patch(worker)
+        worker.enqueue(mock.Mock(), '')
+        worker._main_thread_terminated()
+
+        self.assertFalse(worker.is_alive)
+
+    def test__main_thread_terminated_did_not_join(self):
+        worker = self._make_one(_Logger(self.NAME))
+
+        self._start_with_thread_patch(worker)
+        worker._thread._terminate_on_join = False
+        worker.enqueue(mock.Mock(), '')
+        worker._main_thread_terminated()
+
+        self.assertFalse(worker.is_alive)
+
+    @staticmethod
+    def _enqueue_record(worker, message):
+        record = logging.LogRecord(
+            'python_logger', logging.INFO,
+            None, None, message, None, None)
         worker.enqueue(record, message)
-        worker._start()
-        while not worker.started:
-            time.sleep(1)  # pragma: NO COVER
-        worker._stop_timeout = None
-        worker._stop()
-        self.assertTrue(worker.stopped)
+
+    def test__thread_main(self):
+        from google.cloud.logging.handlers.transports import background_thread
+
+        worker = self._make_one(_Logger(self.NAME))
+
+        # Enqueue two records and the termination signal.
+        self._enqueue_record(worker, '1')
+        self._enqueue_record(worker, '2')
+        worker._queue.put_nowait(background_thread._WORKER_TERMINATOR)
+
+        worker._thread_main()
+
+        self.assertTrue(worker._cloud_logger._batch.commit_called)
+        self.assertEqual(worker._cloud_logger._batch.commit_count, 2)
+        self.assertEqual(worker._queue.qsize(), 0)
+
+    def test__thread_main_error(self):
+        from google.cloud.logging.handlers.transports import background_thread
+
+        worker = self._make_one(_Logger(self.NAME))
+        worker._cloud_logger._batch_cls = _RaisingBatch
+
+        # Enqueue one record and the termination signal.
+        self._enqueue_record(worker, '1')
+        worker._queue.put_nowait(background_thread._WORKER_TERMINATOR)
+
+        worker._thread_main()
+
+        self.assertTrue(worker._cloud_logger._batch.commit_called)
+        self.assertEqual(worker._queue.qsize(), 0)
+
+    def test__thread_main_batches(self):
+        from google.cloud.logging.handlers.transports import background_thread
+
+        worker = self._make_one(_Logger(self.NAME), max_batch_size=2)
+
+        # Enqueue three records and the termination signal. This should be
+        # enough to perform two separate batches and a third loop with just
+        # the exit.
+        self._enqueue_record(worker, '1')
+        self._enqueue_record(worker, '2')
+        self._enqueue_record(worker, '3')
+        self._enqueue_record(worker, '4')
+        worker._queue.put_nowait(background_thread._WORKER_TERMINATOR)
+
+        worker._thread_main()
+
+        # The last batch should not have been executed because it had no items.
+        self.assertFalse(worker._cloud_logger._batch.commit_called)
+        self.assertEqual(worker._queue.qsize(), 0)
+
+
+class _Thread(object):
+
+    def __init__(self, target, name):
+        self._target = target
+        self._name = name
+        self._timeout = None
+        self._terminate_on_join = True
+        self.daemon = False
+
+    def is_alive(self):
+        return self._is_alive
+
+    def start(self):
+        self._is_alive = True
+
+    def stop(self):
+        self._is_alive = False
+
+    def join(self, timeout=None):
+        self._timeout = timeout
+        if self._terminate_on_join:
+            self.stop()
 
 
 class _Batch(object):
@@ -149,23 +263,33 @@ class _Batch(object):
     def __init__(self):
         self.entries = []
         self.commit_called = False
+        self.commit_count = None
 
-    def log_struct(self, record, severity=logging.INFO):
-        self.log_struct_called_with = (record, severity)
-        self.entries.append(record)
+    def log_struct(self, info, severity=logging.INFO):
+        self.log_struct_called_with = (info, severity)
+        self.entries.append(info)
 
     def commit(self):
         self.commit_called = True
+        self.commit_count = len(self.entries)
         del self.entries[:]
 
 
+class _RaisingBatch(_Batch):
+    def commit(self):
+        self.commit_called = True
+        raise ValueError('This batch raises on commit.')
+
+
 class _Logger(object):
 
     def __init__(self, name):
         self.name = name
+        self._batch_cls = _Batch
+        self._batch = None
 
     def batch(self):
-        self._batch = _Batch()
+        self._batch = self._batch_cls()
         return self._batch