mirror of
https://github.com/home-assistant/core.git
synced 2025-07-19 11:17:21 +00:00
Move worker pool monitoring to be time based instead of add_job based. (#3439)
* Move worker pool monitoring to be time based instead of add_job based. * Stub out worker pool monitor during tests * Add test for monitor worker pool. * Improve naming * Test stop_monitor coroutine * Add async_create_timer test * Finish rename create_timer
This commit is contained in:
parent
d31f6bc3f0
commit
be68fe0d85
@ -56,6 +56,9 @@ MIN_WORKER_THREAD = 2
|
|||||||
# Pattern for validating entity IDs (format: <domain>.<entity>)
|
# Pattern for validating entity IDs (format: <domain>.<entity>)
|
||||||
ENTITY_ID_PATTERN = re.compile(r"^(\w+)\.(\w+)$")
|
ENTITY_ID_PATTERN = re.compile(r"^(\w+)\.(\w+)$")
|
||||||
|
|
||||||
|
# Interval at which we check if the pool is getting busy
|
||||||
|
MONITOR_POOL_INTERVAL = 30
|
||||||
|
|
||||||
_LOGGER = logging.getLogger(__name__)
|
_LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@ -190,7 +193,8 @@ class HomeAssistant(object):
|
|||||||
|
|
||||||
This method is a coroutine.
|
This method is a coroutine.
|
||||||
"""
|
"""
|
||||||
create_timer(self)
|
async_create_timer(self)
|
||||||
|
async_monitor_worker_pool(self)
|
||||||
self.bus.async_fire(EVENT_HOMEASSISTANT_START)
|
self.bus.async_fire(EVENT_HOMEASSISTANT_START)
|
||||||
yield from self.loop.run_in_executor(None, self.pool.block_till_done)
|
yield from self.loop.run_in_executor(None, self.pool.block_till_done)
|
||||||
self.state = CoreState.running
|
self.state = CoreState.running
|
||||||
@ -1075,13 +1079,8 @@ class Config(object):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def create_timer(hass, interval=TIMER_INTERVAL):
|
def async_create_timer(hass, interval=TIMER_INTERVAL):
|
||||||
"""Create a timer that will start on HOMEASSISTANT_START."""
|
"""Create a timer that will start on HOMEASSISTANT_START."""
|
||||||
# We want to be able to fire every time a minute starts (seconds=0).
|
|
||||||
# We want this so other modules can use that to make sure they fire
|
|
||||||
# every minute.
|
|
||||||
assert 60 % interval == 0, "60 % TIMER_INTERVAL should be 0!"
|
|
||||||
|
|
||||||
stop_event = asyncio.Event(loop=hass.loop)
|
stop_event = asyncio.Event(loop=hass.loop)
|
||||||
|
|
||||||
# Setting the Event inside the loop by marking it as a coroutine
|
# Setting the Event inside the loop by marking it as a coroutine
|
||||||
@ -1160,14 +1159,48 @@ def create_worker_pool(worker_count=None):
|
|||||||
# We do not want to crash our ThreadPool
|
# We do not want to crash our ThreadPool
|
||||||
_LOGGER.exception("BusHandler:Exception doing job")
|
_LOGGER.exception("BusHandler:Exception doing job")
|
||||||
|
|
||||||
def busy_callback(worker_count, current_jobs, pending_jobs_count):
|
return util.ThreadPool(job_handler, worker_count)
|
||||||
"""Callback to be called when the pool queue gets too big."""
|
|
||||||
|
|
||||||
|
def async_monitor_worker_pool(hass):
|
||||||
|
"""Create a monitor for the thread pool to check if pool is misbehaving."""
|
||||||
|
busy_threshold = hass.pool.worker_count * 3
|
||||||
|
|
||||||
|
handle = None
|
||||||
|
|
||||||
|
def schedule():
|
||||||
|
"""Schedule the monitor."""
|
||||||
|
nonlocal handle
|
||||||
|
handle = hass.loop.call_later(MONITOR_POOL_INTERVAL,
|
||||||
|
check_pool_threshold)
|
||||||
|
|
||||||
|
def check_pool_threshold():
|
||||||
|
"""Check pool size."""
|
||||||
|
nonlocal busy_threshold
|
||||||
|
|
||||||
|
pending_jobs = hass.pool.queue_size
|
||||||
|
|
||||||
|
if pending_jobs < busy_threshold:
|
||||||
|
schedule()
|
||||||
|
return
|
||||||
|
|
||||||
_LOGGER.warning(
|
_LOGGER.warning(
|
||||||
"WorkerPool:All %d threads are busy and %d jobs pending",
|
"WorkerPool:All %d threads are busy and %d jobs pending",
|
||||||
worker_count, pending_jobs_count)
|
hass.pool.worker_count, pending_jobs)
|
||||||
|
|
||||||
for start, job in current_jobs:
|
for start, job in hass.pool.current_jobs:
|
||||||
_LOGGER.warning("WorkerPool:Current job from %s: %s",
|
_LOGGER.warning("WorkerPool:Current job started at %s: %s",
|
||||||
dt_util.as_local(start).isoformat(), job)
|
dt_util.as_local(start).isoformat(), job)
|
||||||
|
|
||||||
return util.ThreadPool(job_handler, worker_count, busy_callback)
|
busy_threshold *= 2
|
||||||
|
|
||||||
|
schedule()
|
||||||
|
|
||||||
|
schedule()
|
||||||
|
|
||||||
|
@asyncio.coroutine
|
||||||
|
def stop_monitor(event):
|
||||||
|
"""Stop the monitor."""
|
||||||
|
handle.cancel()
|
||||||
|
|
||||||
|
hass.bus.async_listen_once(EVENT_HOMEASSISTANT_STOP, stop_monitor)
|
||||||
|
@ -143,7 +143,7 @@ class HomeAssistant(ha.HomeAssistant):
|
|||||||
'Unable to setup local API to receive events')
|
'Unable to setup local API to receive events')
|
||||||
|
|
||||||
self.state = ha.CoreState.starting
|
self.state = ha.CoreState.starting
|
||||||
ha.create_timer(self)
|
ha.async_create_timer(self)
|
||||||
|
|
||||||
self.bus.fire(ha.EVENT_HOMEASSISTANT_START,
|
self.bus.fire(ha.EVENT_HOMEASSISTANT_START,
|
||||||
origin=ha.EventOrigin.remote)
|
origin=ha.EventOrigin.remote)
|
||||||
|
@ -308,7 +308,7 @@ class ThreadPool(object):
|
|||||||
"""A priority queue-based thread pool."""
|
"""A priority queue-based thread pool."""
|
||||||
|
|
||||||
# pylint: disable=too-many-instance-attributes
|
# pylint: disable=too-many-instance-attributes
|
||||||
def __init__(self, job_handler, worker_count=0, busy_callback=None):
|
def __init__(self, job_handler, worker_count=0):
|
||||||
"""Initialize the pool.
|
"""Initialize the pool.
|
||||||
|
|
||||||
job_handler: method to be called from worker thread to handle job
|
job_handler: method to be called from worker thread to handle job
|
||||||
@ -318,13 +318,10 @@ class ThreadPool(object):
|
|||||||
pending_jobs_count
|
pending_jobs_count
|
||||||
"""
|
"""
|
||||||
self._job_handler = job_handler
|
self._job_handler = job_handler
|
||||||
self._busy_callback = busy_callback
|
|
||||||
|
|
||||||
self.worker_count = 0
|
self.worker_count = 0
|
||||||
self.busy_warning_limit = 0
|
|
||||||
self._work_queue = queue.PriorityQueue()
|
self._work_queue = queue.PriorityQueue()
|
||||||
self.current_jobs = []
|
self.current_jobs = []
|
||||||
self._lock = threading.RLock()
|
|
||||||
self._quit_task = object()
|
self._quit_task = object()
|
||||||
|
|
||||||
self.running = True
|
self.running = True
|
||||||
@ -332,70 +329,45 @@ class ThreadPool(object):
|
|||||||
for _ in range(worker_count):
|
for _ in range(worker_count):
|
||||||
self.add_worker()
|
self.add_worker()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def queue_size(self):
|
||||||
|
"""Return estimated number of jobs that are waiting to be processed."""
|
||||||
|
return self._work_queue.qsize()
|
||||||
|
|
||||||
def add_worker(self):
|
def add_worker(self):
|
||||||
"""Add worker to the thread pool and reset warning limit."""
|
"""Add worker to the thread pool and reset warning limit."""
|
||||||
with self._lock:
|
if not self.running:
|
||||||
if not self.running:
|
raise RuntimeError("ThreadPool not running")
|
||||||
raise RuntimeError("ThreadPool not running")
|
|
||||||
|
|
||||||
worker = threading.Thread(
|
threading.Thread(
|
||||||
target=self._worker,
|
target=self._worker, daemon=True,
|
||||||
name='ThreadPool Worker {}'.format(self.worker_count))
|
name='ThreadPool Worker {}'.format(self.worker_count)).start()
|
||||||
worker.daemon = True
|
|
||||||
worker.start()
|
|
||||||
|
|
||||||
self.worker_count += 1
|
self.worker_count += 1
|
||||||
self.busy_warning_limit = self.worker_count * 3
|
|
||||||
|
|
||||||
def remove_worker(self):
|
def remove_worker(self):
|
||||||
"""Remove worker from the thread pool and reset warning limit."""
|
"""Remove worker from the thread pool and reset warning limit."""
|
||||||
with self._lock:
|
if not self.running:
|
||||||
if not self.running:
|
raise RuntimeError("ThreadPool not running")
|
||||||
raise RuntimeError("ThreadPool not running")
|
|
||||||
|
|
||||||
self._work_queue.put(PriorityQueueItem(0, self._quit_task))
|
self._work_queue.put(PriorityQueueItem(0, self._quit_task))
|
||||||
|
|
||||||
self.worker_count -= 1
|
self.worker_count -= 1
|
||||||
self.busy_warning_limit = self.worker_count * 3
|
|
||||||
|
|
||||||
def add_job(self, priority, job):
|
def add_job(self, priority, job):
|
||||||
"""Add a job to the queue."""
|
"""Add a job to the queue."""
|
||||||
with self._lock:
|
if not self.running:
|
||||||
if not self.running:
|
raise RuntimeError("ThreadPool not running")
|
||||||
raise RuntimeError("ThreadPool not running")
|
|
||||||
|
|
||||||
self._work_queue.put(PriorityQueueItem(priority, job))
|
self._work_queue.put(PriorityQueueItem(priority, job))
|
||||||
|
|
||||||
# Check if our queue is getting too big.
|
|
||||||
if self._work_queue.qsize() > self.busy_warning_limit \
|
|
||||||
and self._busy_callback is not None:
|
|
||||||
|
|
||||||
# Increase limit we will issue next warning.
|
|
||||||
self.busy_warning_limit *= 2
|
|
||||||
|
|
||||||
self._busy_callback(
|
|
||||||
self.worker_count, self.current_jobs,
|
|
||||||
self._work_queue.qsize())
|
|
||||||
|
|
||||||
def add_many_jobs(self, jobs):
|
def add_many_jobs(self, jobs):
|
||||||
"""Add a list of jobs to the queue."""
|
"""Add a list of jobs to the queue."""
|
||||||
with self._lock:
|
if not self.running:
|
||||||
if not self.running:
|
raise RuntimeError("ThreadPool not running")
|
||||||
raise RuntimeError("ThreadPool not running")
|
|
||||||
|
|
||||||
for priority, job in jobs:
|
for priority, job in jobs:
|
||||||
self._work_queue.put(PriorityQueueItem(priority, job))
|
self._work_queue.put(PriorityQueueItem(priority, job))
|
||||||
|
|
||||||
# Check if our queue is getting too big.
|
|
||||||
if self._work_queue.qsize() > self.busy_warning_limit \
|
|
||||||
and self._busy_callback is not None:
|
|
||||||
|
|
||||||
# Increase limit we will issue next warning.
|
|
||||||
self.busy_warning_limit *= 2
|
|
||||||
|
|
||||||
self._busy_callback(
|
|
||||||
self.worker_count, self.current_jobs,
|
|
||||||
self._work_queue.qsize())
|
|
||||||
|
|
||||||
def block_till_done(self):
|
def block_till_done(self):
|
||||||
"""Block till current work is done."""
|
"""Block till current work is done."""
|
||||||
@ -405,18 +377,17 @@ class ThreadPool(object):
|
|||||||
"""Finish all the jobs and stops all the threads."""
|
"""Finish all the jobs and stops all the threads."""
|
||||||
self.block_till_done()
|
self.block_till_done()
|
||||||
|
|
||||||
with self._lock:
|
if not self.running:
|
||||||
if not self.running:
|
return
|
||||||
return
|
|
||||||
|
|
||||||
# Tell the workers to quit
|
# Tell the workers to quit
|
||||||
for _ in range(self.worker_count):
|
for _ in range(self.worker_count):
|
||||||
self.remove_worker()
|
self.remove_worker()
|
||||||
|
|
||||||
self.running = False
|
self.running = False
|
||||||
|
|
||||||
# Wait till all workers have quit
|
# Wait till all workers have quit
|
||||||
self.block_till_done()
|
self.block_till_done()
|
||||||
|
|
||||||
def _worker(self):
|
def _worker(self):
|
||||||
"""Handle jobs for the thread pool."""
|
"""Handle jobs for the thread pool."""
|
||||||
|
@ -2,6 +2,7 @@ flake8>=3.0.4
|
|||||||
pylint>=1.5.6
|
pylint>=1.5.6
|
||||||
coveralls>=1.1
|
coveralls>=1.1
|
||||||
pytest>=2.9.2
|
pytest>=2.9.2
|
||||||
|
pytest-asyncio>=0.5.0
|
||||||
pytest-cov>=2.3.1
|
pytest-cov>=2.3.1
|
||||||
pytest-timeout>=1.0.0
|
pytest-timeout>=1.0.0
|
||||||
pytest-catchlog>=1.2.2
|
pytest-catchlog>=1.2.2
|
||||||
|
@ -75,9 +75,11 @@ def get_test_home_assistant(num_threads=None):
|
|||||||
"""Helper to start hass."""
|
"""Helper to start hass."""
|
||||||
with patch.object(hass.loop, 'run_forever', return_value=None):
|
with patch.object(hass.loop, 'run_forever', return_value=None):
|
||||||
with patch.object(hass, 'async_stop', return_value=fake_stop()):
|
with patch.object(hass, 'async_stop', return_value=fake_stop()):
|
||||||
with patch.object(ha, 'create_timer', return_value=None):
|
with patch.object(ha, 'async_create_timer', return_value=None):
|
||||||
orig_start()
|
with patch.object(ha, 'async_monitor_worker_pool',
|
||||||
hass.block_till_done()
|
return_value=None):
|
||||||
|
orig_start()
|
||||||
|
hass.block_till_done()
|
||||||
|
|
||||||
def stop_hass():
|
def stop_hass():
|
||||||
orig_stop()
|
orig_stop()
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import unittest
|
import unittest
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch, MagicMock
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
@ -459,3 +459,84 @@ class TestWorkerPool(unittest.TestCase):
|
|||||||
pool.add_job(ha.JobPriority.EVENT_DEFAULT, (register_call, None))
|
pool.add_job(ha.JobPriority.EVENT_DEFAULT, (register_call, None))
|
||||||
pool.block_till_done()
|
pool.block_till_done()
|
||||||
self.assertEqual(1, len(calls))
|
self.assertEqual(1, len(calls))
|
||||||
|
|
||||||
|
|
||||||
|
class TestWorkerPoolMonitor(object):
|
||||||
|
"""Test monitor_worker_pool."""
|
||||||
|
|
||||||
|
@patch('homeassistant.core._LOGGER.warning')
|
||||||
|
def test_worker_pool_monitor(self, mock_warning, event_loop):
|
||||||
|
"""Test we log an error and increase threshold."""
|
||||||
|
hass = MagicMock()
|
||||||
|
hass.pool.worker_count = 3
|
||||||
|
schedule_handle = MagicMock()
|
||||||
|
hass.loop.call_later.return_value = schedule_handle
|
||||||
|
|
||||||
|
ha.async_monitor_worker_pool(hass)
|
||||||
|
assert hass.loop.call_later.called
|
||||||
|
assert hass.bus.async_listen_once.called
|
||||||
|
assert not schedule_handle.called
|
||||||
|
|
||||||
|
check_threshold = hass.loop.call_later.mock_calls[0][1][1]
|
||||||
|
|
||||||
|
hass.pool.queue_size = 8
|
||||||
|
check_threshold()
|
||||||
|
assert not mock_warning.called
|
||||||
|
|
||||||
|
hass.pool.queue_size = 9
|
||||||
|
check_threshold()
|
||||||
|
assert mock_warning.called
|
||||||
|
|
||||||
|
mock_warning.reset_mock()
|
||||||
|
assert not mock_warning.called
|
||||||
|
|
||||||
|
check_threshold()
|
||||||
|
assert not mock_warning.called
|
||||||
|
|
||||||
|
hass.pool.queue_size = 17
|
||||||
|
check_threshold()
|
||||||
|
assert not mock_warning.called
|
||||||
|
|
||||||
|
hass.pool.queue_size = 18
|
||||||
|
check_threshold()
|
||||||
|
assert mock_warning.called
|
||||||
|
|
||||||
|
event_loop.run_until_complete(
|
||||||
|
hass.bus.async_listen_once.mock_calls[0][1][1](None))
|
||||||
|
assert schedule_handle.cancel.called
|
||||||
|
|
||||||
|
|
||||||
|
class TestAsyncCreateTimer(object):
|
||||||
|
"""Test create timer."""
|
||||||
|
|
||||||
|
@patch('homeassistant.core.asyncio.Event')
|
||||||
|
@patch('homeassistant.core.dt_util.utcnow')
|
||||||
|
def test_create_timer(self, mock_utcnow, mock_event, event_loop):
|
||||||
|
"""Test create timer fires correctly."""
|
||||||
|
hass = MagicMock()
|
||||||
|
now = mock_utcnow()
|
||||||
|
event = mock_event()
|
||||||
|
now.second = 1
|
||||||
|
mock_utcnow.reset_mock()
|
||||||
|
|
||||||
|
ha.async_create_timer(hass)
|
||||||
|
assert len(hass.bus.async_listen_once.mock_calls) == 2
|
||||||
|
start_timer = hass.bus.async_listen_once.mock_calls[1][1][1]
|
||||||
|
|
||||||
|
event_loop.run_until_complete(start_timer(None))
|
||||||
|
assert hass.loop.create_task.called
|
||||||
|
|
||||||
|
timer = hass.loop.create_task.mock_calls[0][1][0]
|
||||||
|
event.is_set.side_effect = False, False, True
|
||||||
|
event_loop.run_until_complete(timer)
|
||||||
|
assert len(mock_utcnow.mock_calls) == 1
|
||||||
|
|
||||||
|
assert hass.loop.call_soon.called
|
||||||
|
event_type, event_data = hass.loop.call_soon.mock_calls[0][1][1:]
|
||||||
|
|
||||||
|
assert ha.EVENT_TIME_CHANGED == event_type
|
||||||
|
assert {ha.ATTR_NOW: now} == event_data
|
||||||
|
|
||||||
|
stop_timer = hass.bus.async_listen_once.mock_calls[0][1][1]
|
||||||
|
event_loop.run_until_complete(stop_timer(None))
|
||||||
|
assert event.set.called
|
||||||
|
@ -69,7 +69,7 @@ def setUpModule(): # pylint: disable=invalid-name
|
|||||||
{http.DOMAIN: {http.CONF_API_PASSWORD: API_PASSWORD,
|
{http.DOMAIN: {http.CONF_API_PASSWORD: API_PASSWORD,
|
||||||
http.CONF_SERVER_PORT: SLAVE_PORT}})
|
http.CONF_SERVER_PORT: SLAVE_PORT}})
|
||||||
|
|
||||||
with patch.object(ha, 'create_timer', return_value=None):
|
with patch.object(ha, 'async_create_timer', return_value=None):
|
||||||
slave.start()
|
slave.start()
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user