Move worker pool monitoring to be time based instead of add_job based. (#3439)

* Move worker pool monitoring to be time based instead of add_job based.

* Stub out worker pool monitor during tests

* Add test for monitor worker pool.

* Improve naming

* Test stop_monitor coroutine

* Add async_create_timer test

* Finish rename create_timer
This commit is contained in:
Paulus Schoutsen 2016-09-19 23:39:49 -07:00 committed by GitHub
parent d31f6bc3f0
commit be68fe0d85
7 changed files with 167 additions and 79 deletions

View File

@ -56,6 +56,9 @@ MIN_WORKER_THREAD = 2
# Pattern for validating entity IDs (format: <domain>.<entity>) # Pattern for validating entity IDs (format: <domain>.<entity>)
ENTITY_ID_PATTERN = re.compile(r"^(\w+)\.(\w+)$") ENTITY_ID_PATTERN = re.compile(r"^(\w+)\.(\w+)$")
# Interval at which we check if the pool is getting busy
MONITOR_POOL_INTERVAL = 30
_LOGGER = logging.getLogger(__name__) _LOGGER = logging.getLogger(__name__)
@ -190,7 +193,8 @@ class HomeAssistant(object):
This method is a coroutine. This method is a coroutine.
""" """
create_timer(self) async_create_timer(self)
async_monitor_worker_pool(self)
self.bus.async_fire(EVENT_HOMEASSISTANT_START) self.bus.async_fire(EVENT_HOMEASSISTANT_START)
yield from self.loop.run_in_executor(None, self.pool.block_till_done) yield from self.loop.run_in_executor(None, self.pool.block_till_done)
self.state = CoreState.running self.state = CoreState.running
@ -1075,13 +1079,8 @@ class Config(object):
} }
def create_timer(hass, interval=TIMER_INTERVAL): def async_create_timer(hass, interval=TIMER_INTERVAL):
"""Create a timer that will start on HOMEASSISTANT_START.""" """Create a timer that will start on HOMEASSISTANT_START."""
# We want to be able to fire every time a minute starts (seconds=0).
# We want this so other modules can use that to make sure they fire
# every minute.
assert 60 % interval == 0, "60 % TIMER_INTERVAL should be 0!"
stop_event = asyncio.Event(loop=hass.loop) stop_event = asyncio.Event(loop=hass.loop)
# Setting the Event inside the loop by marking it as a coroutine # Setting the Event inside the loop by marking it as a coroutine
@ -1160,14 +1159,48 @@ def create_worker_pool(worker_count=None):
# We do not want to crash our ThreadPool # We do not want to crash our ThreadPool
_LOGGER.exception("BusHandler:Exception doing job") _LOGGER.exception("BusHandler:Exception doing job")
def busy_callback(worker_count, current_jobs, pending_jobs_count): return util.ThreadPool(job_handler, worker_count)
"""Callback to be called when the pool queue gets too big."""
def async_monitor_worker_pool(hass):
"""Create a monitor for the thread pool to check if pool is misbehaving."""
busy_threshold = hass.pool.worker_count * 3
handle = None
def schedule():
"""Schedule the monitor."""
nonlocal handle
handle = hass.loop.call_later(MONITOR_POOL_INTERVAL,
check_pool_threshold)
def check_pool_threshold():
"""Check pool size."""
nonlocal busy_threshold
pending_jobs = hass.pool.queue_size
if pending_jobs < busy_threshold:
schedule()
return
_LOGGER.warning( _LOGGER.warning(
"WorkerPool:All %d threads are busy and %d jobs pending", "WorkerPool:All %d threads are busy and %d jobs pending",
worker_count, pending_jobs_count) hass.pool.worker_count, pending_jobs)
for start, job in current_jobs: for start, job in hass.pool.current_jobs:
_LOGGER.warning("WorkerPool:Current job from %s: %s", _LOGGER.warning("WorkerPool:Current job started at %s: %s",
dt_util.as_local(start).isoformat(), job) dt_util.as_local(start).isoformat(), job)
return util.ThreadPool(job_handler, worker_count, busy_callback) busy_threshold *= 2
schedule()
schedule()
@asyncio.coroutine
def stop_monitor(event):
"""Stop the monitor."""
handle.cancel()
hass.bus.async_listen_once(EVENT_HOMEASSISTANT_STOP, stop_monitor)

View File

@ -143,7 +143,7 @@ class HomeAssistant(ha.HomeAssistant):
'Unable to setup local API to receive events') 'Unable to setup local API to receive events')
self.state = ha.CoreState.starting self.state = ha.CoreState.starting
ha.create_timer(self) ha.async_create_timer(self)
self.bus.fire(ha.EVENT_HOMEASSISTANT_START, self.bus.fire(ha.EVENT_HOMEASSISTANT_START,
origin=ha.EventOrigin.remote) origin=ha.EventOrigin.remote)

View File

@ -308,7 +308,7 @@ class ThreadPool(object):
"""A priority queue-based thread pool.""" """A priority queue-based thread pool."""
# pylint: disable=too-many-instance-attributes # pylint: disable=too-many-instance-attributes
def __init__(self, job_handler, worker_count=0, busy_callback=None): def __init__(self, job_handler, worker_count=0):
"""Initialize the pool. """Initialize the pool.
job_handler: method to be called from worker thread to handle job job_handler: method to be called from worker thread to handle job
@ -318,13 +318,10 @@ class ThreadPool(object):
pending_jobs_count pending_jobs_count
""" """
self._job_handler = job_handler self._job_handler = job_handler
self._busy_callback = busy_callback
self.worker_count = 0 self.worker_count = 0
self.busy_warning_limit = 0
self._work_queue = queue.PriorityQueue() self._work_queue = queue.PriorityQueue()
self.current_jobs = [] self.current_jobs = []
self._lock = threading.RLock()
self._quit_task = object() self._quit_task = object()
self.running = True self.running = True
@ -332,71 +329,46 @@ class ThreadPool(object):
for _ in range(worker_count): for _ in range(worker_count):
self.add_worker() self.add_worker()
@property
def queue_size(self):
"""Return estimated number of jobs that are waiting to be processed."""
return self._work_queue.qsize()
def add_worker(self): def add_worker(self):
"""Add worker to the thread pool and reset warning limit.""" """Add worker to the thread pool and reset warning limit."""
with self._lock:
if not self.running: if not self.running:
raise RuntimeError("ThreadPool not running") raise RuntimeError("ThreadPool not running")
worker = threading.Thread( threading.Thread(
target=self._worker, target=self._worker, daemon=True,
name='ThreadPool Worker {}'.format(self.worker_count)) name='ThreadPool Worker {}'.format(self.worker_count)).start()
worker.daemon = True
worker.start()
self.worker_count += 1 self.worker_count += 1
self.busy_warning_limit = self.worker_count * 3
def remove_worker(self): def remove_worker(self):
"""Remove worker from the thread pool and reset warning limit.""" """Remove worker from the thread pool and reset warning limit."""
with self._lock:
if not self.running: if not self.running:
raise RuntimeError("ThreadPool not running") raise RuntimeError("ThreadPool not running")
self._work_queue.put(PriorityQueueItem(0, self._quit_task)) self._work_queue.put(PriorityQueueItem(0, self._quit_task))
self.worker_count -= 1 self.worker_count -= 1
self.busy_warning_limit = self.worker_count * 3
def add_job(self, priority, job): def add_job(self, priority, job):
"""Add a job to the queue.""" """Add a job to the queue."""
with self._lock:
if not self.running: if not self.running:
raise RuntimeError("ThreadPool not running") raise RuntimeError("ThreadPool not running")
self._work_queue.put(PriorityQueueItem(priority, job)) self._work_queue.put(PriorityQueueItem(priority, job))
# Check if our queue is getting too big.
if self._work_queue.qsize() > self.busy_warning_limit \
and self._busy_callback is not None:
# Increase limit we will issue next warning.
self.busy_warning_limit *= 2
self._busy_callback(
self.worker_count, self.current_jobs,
self._work_queue.qsize())
def add_many_jobs(self, jobs): def add_many_jobs(self, jobs):
"""Add a list of jobs to the queue.""" """Add a list of jobs to the queue."""
with self._lock:
if not self.running: if not self.running:
raise RuntimeError("ThreadPool not running") raise RuntimeError("ThreadPool not running")
for priority, job in jobs: for priority, job in jobs:
self._work_queue.put(PriorityQueueItem(priority, job)) self._work_queue.put(PriorityQueueItem(priority, job))
# Check if our queue is getting too big.
if self._work_queue.qsize() > self.busy_warning_limit \
and self._busy_callback is not None:
# Increase limit we will issue next warning.
self.busy_warning_limit *= 2
self._busy_callback(
self.worker_count, self.current_jobs,
self._work_queue.qsize())
def block_till_done(self): def block_till_done(self):
"""Block till current work is done.""" """Block till current work is done."""
self._work_queue.join() self._work_queue.join()
@ -405,7 +377,6 @@ class ThreadPool(object):
"""Finish all the jobs and stops all the threads.""" """Finish all the jobs and stops all the threads."""
self.block_till_done() self.block_till_done()
with self._lock:
if not self.running: if not self.running:
return return

View File

@ -2,6 +2,7 @@ flake8>=3.0.4
pylint>=1.5.6 pylint>=1.5.6
coveralls>=1.1 coveralls>=1.1
pytest>=2.9.2 pytest>=2.9.2
pytest-asyncio>=0.5.0
pytest-cov>=2.3.1 pytest-cov>=2.3.1
pytest-timeout>=1.0.0 pytest-timeout>=1.0.0
pytest-catchlog>=1.2.2 pytest-catchlog>=1.2.2

View File

@ -75,7 +75,9 @@ def get_test_home_assistant(num_threads=None):
"""Helper to start hass.""" """Helper to start hass."""
with patch.object(hass.loop, 'run_forever', return_value=None): with patch.object(hass.loop, 'run_forever', return_value=None):
with patch.object(hass, 'async_stop', return_value=fake_stop()): with patch.object(hass, 'async_stop', return_value=fake_stop()):
with patch.object(ha, 'create_timer', return_value=None): with patch.object(ha, 'async_create_timer', return_value=None):
with patch.object(ha, 'async_monitor_worker_pool',
return_value=None):
orig_start() orig_start()
hass.block_till_done() hass.block_till_done()

View File

@ -4,7 +4,7 @@
import os import os
import signal import signal
import unittest import unittest
from unittest.mock import patch from unittest.mock import patch, MagicMock
from datetime import datetime, timedelta from datetime import datetime, timedelta
import pytz import pytz
@ -459,3 +459,84 @@ class TestWorkerPool(unittest.TestCase):
pool.add_job(ha.JobPriority.EVENT_DEFAULT, (register_call, None)) pool.add_job(ha.JobPriority.EVENT_DEFAULT, (register_call, None))
pool.block_till_done() pool.block_till_done()
self.assertEqual(1, len(calls)) self.assertEqual(1, len(calls))
class TestWorkerPoolMonitor(object):
"""Test monitor_worker_pool."""
@patch('homeassistant.core._LOGGER.warning')
def test_worker_pool_monitor(self, mock_warning, event_loop):
"""Test we log an error and increase threshold."""
hass = MagicMock()
hass.pool.worker_count = 3
schedule_handle = MagicMock()
hass.loop.call_later.return_value = schedule_handle
ha.async_monitor_worker_pool(hass)
assert hass.loop.call_later.called
assert hass.bus.async_listen_once.called
assert not schedule_handle.called
check_threshold = hass.loop.call_later.mock_calls[0][1][1]
hass.pool.queue_size = 8
check_threshold()
assert not mock_warning.called
hass.pool.queue_size = 9
check_threshold()
assert mock_warning.called
mock_warning.reset_mock()
assert not mock_warning.called
check_threshold()
assert not mock_warning.called
hass.pool.queue_size = 17
check_threshold()
assert not mock_warning.called
hass.pool.queue_size = 18
check_threshold()
assert mock_warning.called
event_loop.run_until_complete(
hass.bus.async_listen_once.mock_calls[0][1][1](None))
assert schedule_handle.cancel.called
class TestAsyncCreateTimer(object):
"""Test create timer."""
@patch('homeassistant.core.asyncio.Event')
@patch('homeassistant.core.dt_util.utcnow')
def test_create_timer(self, mock_utcnow, mock_event, event_loop):
"""Test create timer fires correctly."""
hass = MagicMock()
now = mock_utcnow()
event = mock_event()
now.second = 1
mock_utcnow.reset_mock()
ha.async_create_timer(hass)
assert len(hass.bus.async_listen_once.mock_calls) == 2
start_timer = hass.bus.async_listen_once.mock_calls[1][1][1]
event_loop.run_until_complete(start_timer(None))
assert hass.loop.create_task.called
timer = hass.loop.create_task.mock_calls[0][1][0]
event.is_set.side_effect = False, False, True
event_loop.run_until_complete(timer)
assert len(mock_utcnow.mock_calls) == 1
assert hass.loop.call_soon.called
event_type, event_data = hass.loop.call_soon.mock_calls[0][1][1:]
assert ha.EVENT_TIME_CHANGED == event_type
assert {ha.ATTR_NOW: now} == event_data
stop_timer = hass.bus.async_listen_once.mock_calls[0][1][1]
event_loop.run_until_complete(stop_timer(None))
assert event.set.called

View File

@ -69,7 +69,7 @@ def setUpModule(): # pylint: disable=invalid-name
{http.DOMAIN: {http.CONF_API_PASSWORD: API_PASSWORD, {http.DOMAIN: {http.CONF_API_PASSWORD: API_PASSWORD,
http.CONF_SERVER_PORT: SLAVE_PORT}}) http.CONF_SERVER_PORT: SLAVE_PORT}})
with patch.object(ha, 'create_timer', return_value=None): with patch.object(ha, 'async_create_timer', return_value=None):
slave.start() slave.start()