Reduce bottlenecks in bootstrap by ordering the setup of integrations (#113570)

This commit is contained in:
J. Nick Koston 2024-03-15 22:38:06 -10:00 committed by GitHub
parent 702488062b
commit eb90c9a548
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 236 additions and 27 deletions

View File

@ -5,9 +5,11 @@ from __future__ import annotations
import asyncio
import contextlib
from datetime import timedelta
from functools import partial
from itertools import chain
import logging
import logging.handlers
from operator import itemgetter
from operator import contains, itemgetter
import os
import platform
import sys
@ -94,6 +96,9 @@ if TYPE_CHECKING:
_LOGGER = logging.getLogger(__name__)
SETUP_ORDER_SORT_KEY = partial(contains, BASE_PLATFORMS)
ERROR_LOG_FILENAME = "home-assistant.log"
# hass.data key for logging information.
@ -664,13 +669,18 @@ async def async_setup_multi_components(
"""Set up multiple domains. Log on failure."""
# Avoid creating tasks for domains that were setup in a previous stage
domains_not_yet_setup = domains - hass.config.components
# Create setup tasks for base platforms first since everything will have
# to wait to be imported, and the sooner we can get the base platforms
# loaded the sooner we can start loading the rest of the integrations.
futures = {
domain: hass.async_create_task(
async_setup_component(hass, domain, config),
f"setup component {domain}",
eager_start=True,
)
for domain in domains_not_yet_setup
for domain in sorted(
domains_not_yet_setup, key=SETUP_ORDER_SORT_KEY, reverse=True
)
}
results = await asyncio.gather(*futures.values(), return_exceptions=True)
for idx, domain in enumerate(futures):
@ -687,29 +697,53 @@ async def _async_resolve_domains_to_setup(
hass: core.HomeAssistant, config: dict[str, Any]
) -> tuple[set[str], dict[str, loader.Integration]]:
"""Resolve all dependencies and return list of domains to set up."""
base_platforms_loaded = False
domains_to_setup = _get_domains(hass, config)
needed_requirements: set[str] = set()
platform_integrations = conf_util.extract_platform_integrations(
config, BASE_PLATFORMS
)
# Ensure base platforms that have platform integrations are added to
# to `domains_to_setup so they can be setup first instead of
# discovering them when later when a config entry setup task
# notices its needed and there is already a long line to use
# the import executor.
#
# For example if we have
# sensor:
# - platform: template
#
# `template` has to be loaded to validate the config for sensor
# so we want to start loading `sensor` as soon as we know
# it will be needed. The more platforms under `sensor:`, the longer
# it will take to finish setup for `sensor` because each of these
# platforms has to be imported before we can validate the config.
#
# Thankfully we are migrating away from the platform pattern
# so this will be less of a problem in the future.
domains_to_setup.update(platform_integrations)
# Load manifests for base platforms and platform based integrations
# that are defined under base platforms right away since we do not require
# the manifest to list them as dependencies and we want to avoid the lock
# contention when multiple integrations try to load them at once
additional_manifests_to_load = {
*BASE_PLATFORMS,
*chain.from_iterable(platform_integrations.values()),
}
translations_to_load = {*domains_to_setup, *additional_manifests_to_load}
# Resolve all dependencies so we know all integrations
# that will have to be loaded and start rightaway
# that will have to be loaded and start right-away
integration_cache: dict[str, loader.Integration] = {}
to_resolve: set[str] = domains_to_setup
while to_resolve:
while to_resolve or additional_manifests_to_load:
old_to_resolve: set[str] = to_resolve
to_resolve = set()
if not base_platforms_loaded:
# Load base platforms right away since
# we do not require the manifest to list
# them as dependencies and we want
# to avoid the lock contention when multiple
# integrations try to resolve them at once
base_platforms_loaded = True
to_get = {*old_to_resolve, *BASE_PLATFORMS, *platform_integrations}
if additional_manifests_to_load:
to_get = {*old_to_resolve, *additional_manifests_to_load}
additional_manifests_to_load.clear()
else:
to_get = old_to_resolve
@ -722,6 +756,17 @@ async def _async_resolve_domains_to_setup(
continue
integration_cache[domain] = itg
needed_requirements.update(itg.requirements)
# Make sure manifests for dependencies are loaded in the next
# loop to try to group as many as manifest loads in a single
# call to avoid the creating one-off executor jobs later in
# the setup process
additional_manifests_to_load.update(
dep
for dep in chain(itg.dependencies, itg.after_dependencies)
if dep not in integration_cache
)
if domain not in old_to_resolve:
continue
@ -781,9 +826,7 @@ async def _async_resolve_domains_to_setup(
# wait for the translation load lock, loading will be done by the
# time it gets to it.
hass.async_create_background_task(
translation.async_load_integrations(
hass, {*BASE_PLATFORMS, *platform_integrations, *domains_to_setup}
),
translation.async_load_integrations(hass, translations_to_load),
"load translations",
eager_start=True,
)

View File

@ -1391,9 +1391,14 @@ def config_per_platform(
yield platform, item
def extract_platform_integrations(config: ConfigType, domains: set[str]) -> set[str]:
"""Find all the platforms in a configuration."""
platform_integrations: set[str] = set()
def extract_platform_integrations(
config: ConfigType, domains: set[str]
) -> dict[str, set[str]]:
"""Find all the platforms in a configuration.
Returns a dictionary with domain as key and a set of platforms as value.
"""
platform_integrations: dict[str, set[str]] = {}
for key, domain_config in config.items():
try:
domain = cv.domain_key(key)
@ -1411,7 +1416,7 @@ def extract_platform_integrations(config: ConfigType, domains: set[str]) -> set[
except AttributeError:
continue
if platform:
platform_integrations.add(platform)
platform_integrations.setdefault(domain, set()).add(platform)
return platform_integrations

View File

@ -222,6 +222,93 @@ async def test_setup_after_deps_in_stage_1_ignored(hass: HomeAssistant) -> None:
assert order == ["cloud", "an_after_dep", "normal_integration"]
@pytest.mark.parametrize("load_registries", [False])
async def test_setup_after_deps_manifests_are_loaded_even_if_not_setup(
hass: HomeAssistant,
) -> None:
"""Ensure we preload manifests for after deps even if they are not setup.
Its important that we preload the after dep manifests even if they are not setup
since we will always have to check their requirements since any integration
that lists an after dep may import it and we have to ensure requirements are
up to date before the after dep can be imported.
"""
# This test relies on this
assert "cloud" in bootstrap.STAGE_1_INTEGRATIONS
order = []
def gen_domain_setup(domain):
async def async_setup(hass, config):
order.append(domain)
return True
return async_setup
mock_integration(
hass,
MockModule(
domain="normal_integration",
async_setup=gen_domain_setup("normal_integration"),
partial_manifest={"after_dependencies": ["an_after_dep"]},
),
)
mock_integration(
hass,
MockModule(
domain="an_after_dep",
async_setup=gen_domain_setup("an_after_dep"),
partial_manifest={"after_dependencies": ["an_after_dep_of_after_dep"]},
),
)
mock_integration(
hass,
MockModule(
domain="an_after_dep_of_after_dep",
async_setup=gen_domain_setup("an_after_dep_of_after_dep"),
partial_manifest={
"after_dependencies": ["an_after_dep_of_after_dep_of_after_dep"]
},
),
)
mock_integration(
hass,
MockModule(
domain="an_after_dep_of_after_dep_of_after_dep",
async_setup=gen_domain_setup("an_after_dep_of_after_dep_of_after_dep"),
),
)
mock_integration(
hass,
MockModule(
domain="cloud",
async_setup=gen_domain_setup("cloud"),
partial_manifest={"after_dependencies": ["normal_integration"]},
),
)
await bootstrap._async_set_up_integrations(
hass, {"cloud": {}, "normal_integration": {}}
)
assert "normal_integration" in hass.config.components
assert "cloud" in hass.config.components
assert "an_after_dep" not in hass.config.components
assert "an_after_dep_of_after_dep" not in hass.config.components
assert "an_after_dep_of_after_dep_of_after_dep" not in hass.config.components
assert order == ["cloud", "normal_integration"]
assert loader.async_get_loaded_integration(hass, "an_after_dep") is not None
assert (
loader.async_get_loaded_integration(hass, "an_after_dep_of_after_dep")
is not None
)
assert (
loader.async_get_loaded_integration(
hass, "an_after_dep_of_after_dep_of_after_dep"
)
is not None
)
@pytest.mark.parametrize("load_registries", [False])
async def test_setup_frontend_before_recorder(hass: HomeAssistant) -> None:
"""Test frontend is setup before recorder."""
@ -1193,3 +1280,78 @@ async def test_cancellation_does_not_leak_upward_from_async_setup_entry(
assert "test_package" in hass.config.components
assert "test_package_raises_cancelled_error_config_entry" in hass.config.components
@pytest.mark.parametrize("load_registries", [False])
async def test_setup_does_base_platforms_first(hass: HomeAssistant) -> None:
"""Test setup does base platforms first.
Its important that base platforms are setup before other integrations
in stage1/2 since they are the foundation for other integrations and
almost every integration has to wait for them to be setup.
"""
order = []
def gen_domain_setup(domain):
async def async_setup(hass, config):
order.append(domain)
return True
return async_setup
mock_integration(
hass, MockModule(domain="sensor", async_setup=gen_domain_setup("sensor"))
)
mock_integration(
hass,
MockModule(
domain="binary_sensor", async_setup=gen_domain_setup("binary_sensor")
),
)
mock_integration(
hass, MockModule(domain="root", async_setup=gen_domain_setup("root"))
)
mock_integration(
hass,
MockModule(
domain="first_dep",
async_setup=gen_domain_setup("first_dep"),
partial_manifest={"after_dependencies": ["root"]},
),
)
mock_integration(
hass,
MockModule(
domain="second_dep",
async_setup=gen_domain_setup("second_dep"),
partial_manifest={"after_dependencies": ["first_dep"]},
),
)
with patch(
"homeassistant.components.logger.async_setup", gen_domain_setup("logger")
):
await bootstrap._async_set_up_integrations(
hass,
{
"root": {},
"first_dep": {},
"second_dep": {},
"sensor": {},
"logger": {},
"binary_sensor": {},
},
)
assert "binary_sensor" in hass.config.components
assert "sensor" in hass.config.components
assert "root" in hass.config.components
assert "first_dep" in hass.config.components
assert "second_dep" in hass.config.components
assert order[0] == "logger"
# base platforms (sensor/binary_sensor) should be setup before other integrations
# but after logger integrations. The order of base platforms is not guaranteed,
# only that they are setup before other integrations.
assert set(order[1:3]) == {"sensor", "binary_sensor"}
assert order[3:] == ["root", "first_dep", "second_dep"]

View File

@ -2361,13 +2361,12 @@ def test_extract_platform_integrations() -> None:
]
)
assert config_util.extract_platform_integrations(config, {"zone"}) == {
"hello",
"hello 2",
"zone": {"hello", "hello 2"}
}
assert config_util.extract_platform_integrations(config, {"zonex"}) == set()
assert config_util.extract_platform_integrations(config, {"zoney"}) == set()
assert config_util.extract_platform_integrations(config, {"zonex"}) == {}
assert config_util.extract_platform_integrations(config, {"zoney"}) == {}
assert config_util.extract_platform_integrations(
config, {"zone", "not_valid", "notzone"}
) == {"hello", "hello 2", "nothello"}
assert config_util.extract_platform_integrations(config, {"zoneq"}) == set()
assert config_util.extract_platform_integrations(config, {"zoneempty"}) == set()
) == {"zone": {"hello 2", "hello"}, "notzone": {"nothello"}}
assert config_util.extract_platform_integrations(config, {"zoneq"}) == {}
assert config_util.extract_platform_integrations(config, {"zoneempty"}) == {}