Improve handling of timeouts in tests (#2890)

* Improve handling of timeouts in tests Make timeout handling in tests more transparent. Added a custom shell driver that allows to define global timeout for commands in the config file, and replaced for/sleep constructs with infinite loops that will be eventually terminated by pytest-timeout plugin. Current timeouts taken from last runs on Github CI with some extra headroom. * test_supervisor_is_updated shouldn't be skipped if no update was needed * Allow more time for system startup * Allow even more time for system startup
2025-07-12 15:46:30 +00:00 · 2023-10-31 18:16:49 +01:00 · 2023-10-31 18:16:49 +01:00 · 2888ccf28e
commit 2888ccf28e
parent c33fc03fd6
7 changed files with 53 additions and 32 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -44,7 +44,7 @@ def shell_json(target, strategy) -> callable:
    strategy.transition("shell")
    shell = target.get_driver("ShellDriver")
-    def get_json_response(command, *, timeout=60) -> dict:
+    def get_json_response(command, *, timeout=None) -> dict:
        return json.loads("\n".join(shell.run_check(command, timeout=timeout)))
    return get_json_response
--- a/tests/pytest.ini
+++ b/tests/pytest.ini
@ -0,0 +1,2 @@
 [pytest]
 timeout_method = signal
--- a/tests/qemu-strategy.yaml
+++ b/tests/qemu-strategy.yaml
@ -12,11 +12,12 @@ targets:
          nic: user,model=virtio-net-pci
          disk: disk-image
          bios: bios
-      - ShellDriver:
+      - CustomTimeoutShellDriver:
          login_prompt: 'homeassistant login: '
          username: 'root'
          prompt: '# '
          login_timeout: 300
          command_timeout: 300
      - QEMUShellStrategy: {}
 tools:
--- a/tests/qemu_shell_strategy.py
+++ b/tests/qemu_shell_strategy.py
@ -4,6 +4,7 @@ import os
 import attr
 from labgrid import target_factory, step
 from labgrid.driver import ShellDriver
 from labgrid.strategy import Strategy, StrategyError
@ -13,6 +14,19 @@ class Status(enum.Enum):
    shell = 2
@target_factory.reg_driver
@attr.s(eq=False)
 class CustomTimeoutShellDriver(ShellDriver):
    """ShellDriver with a config-customizable timeout for run and run_check."""
    command_timeout = attr.ib(default=30, validator=attr.validators.instance_of(int))
    def run(self, cmd: str, *, timeout=None, codec="utf-8", decodeerrors="strict"):
        return super().run(cmd, timeout=timeout or self.command_timeout, codec=codec, decodeerrors=decodeerrors)
    def run_check(self, cmd: str, *, timeout=None, codec="utf-8", decodeerrors="strict"):
        return super().run_check(cmd, timeout=timeout or self.command_timeout, codec=codec, decodeerrors=decodeerrors)
@target_factory.reg_driver
@attr.s(eq=False)
 class QEMUShellStrategy(Strategy):
@ -20,7 +34,7 @@ class QEMUShellStrategy(Strategy):
    bindings = {
        "qemu": "QEMUDriver",
-        "shell": "ShellDriver",
+        "shell": "CustomTimeoutShellDriver",
    }
    status = attr.ib(default=Status.unknown)
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@ -1,3 +1,4 @@
 labgrid==23.0.3
 pytest==7.2.2
 pytest-dependency==0.5.1
 pytest-timeout==2.2.0
--- a/tests/smoke_test/test_basic.py
+++ b/tests/smoke_test/test_basic.py
@ -1,10 +1,14 @@
 import logging
 from time import sleep
 import pytest
 _LOGGER = logging.getLogger(__name__)
@pytest.mark.dependency()
@pytest.mark.timeout(600)
 def test_init(shell):
    def check_container_running(container_name):
        out = shell.run_check(
@ -13,35 +17,37 @@ def test_init(shell):
        return "running" in out
    # wait for important containers first
-    for _ in range(20):
+    while True:
        if check_container_running("homeassistant") and check_container_running("hassio_supervisor"):
            break
-        sleep(5)
+        sleep(1)
    # wait for system ready
-    for _ in range(20):
+    while True:
        output = "\n".join(shell.run_check("ha os info || true"))
        if "System is not ready" not in output:
            break
-        sleep(5)
+        sleep(1)
    output = shell.run_check("ha os info")
    _LOGGER.info("%s", "\n".join(output))
@pytest.mark.dependency(depends=["test_init"])
 def test_dmesg(shell):
    output = shell.run_check("dmesg")
    _LOGGER.info("%s", "\n".join(output))
@pytest.mark.dependency(depends=["test_init"])
 def test_supervisor_logs(shell):
    output = shell.run_check("ha su logs")
    _LOGGER.info("%s", "\n".join(output))
@pytest.mark.dependency(depends=["test_init"])
 def test_systemctl_status(shell):
-    output = shell.run_check(
+    output = shell.run_check("systemctl --no-pager -l status -a || true")
        "systemctl --no-pager -l status -a || true", timeout=90
    )
    _LOGGER.info("%s", "\n".join(output))
--- a/tests/supervisor_test/test_supervisor.py
+++ b/tests/supervisor_test/test_supervisor.py
@ -16,30 +16,30 @@ def stash() -> dict:
@pytest.mark.dependency()
@pytest.mark.timeout(600)
 def test_start_supervisor(shell, shell_json):
    def check_container_running(container_name):
        out = shell.run_check(f"docker container inspect -f '{{{{.State.Status}}}}' {container_name} || true")
        return "running" in out
-    for _ in range(20):
+    while True:
        if check_container_running("homeassistant") and check_container_running("hassio_supervisor"):
            break
-        sleep(5)
+        sleep(1)
    supervisor_ip = "\n".join(
        shell.run_check("docker inspect --format='{{.NetworkSettings.IPAddress}}' hassio_supervisor")
    )
-    for _ in range(20):
+    while True:
        try:
            if shell_json(f"curl -sSL http://{supervisor_ip}/supervisor/ping").get("result") == "ok":
                break
        except ExecutionError:
            pass  # avoid failure when the container is restarting
-        sleep(5)
+
-    else:
+        sleep(1)
        raise AssertionError("Supervisor did not start in time")
@pytest.mark.dependency(depends=["test_start_supervisor"])
@ -55,6 +55,7 @@ def test_check_supervisor(shell_json):
@pytest.mark.dependency(depends=["test_check_supervisor"])
@pytest.mark.timeout(300)
 def test_update_supervisor(shell_json):
    supervisor_info = shell_json("ha supervisor info --no-progress --raw-json")
    supervisor_version = supervisor_info.get("data").get("version")
@ -68,9 +69,9 @@ def test_update_supervisor(shell_json):
        else:
            assert result.get("result") == "ok", f"Supervisor update failed: {result}"
-        for _ in range(40):
+        while True:
            try:
-                supervisor_info = shell_json("ha supervisor info --no-progress --raw-json", timeout=90)
+                supervisor_info = shell_json("ha supervisor info --no-progress --raw-json")
                data = supervisor_info.get("data")
                if data and data.get("version") == data.get("version_latest"):
                    logger.info(
@ -82,14 +83,13 @@ def test_update_supervisor(shell_json):
                    break
            except ExecutionError:
                pass  # avoid failure when the container is restarting
-            sleep(5)
+
-        else:
+            sleep(1)
            raise AssertionError("Supervisor did not update in time")
-@pytest.mark.dependency(depends=["test_update_supervisor"])
+@pytest.mark.dependency(depends=["test_check_supervisor"])
 def test_supervisor_is_updated(shell_json):
-    supervisor_info = shell_json("ha supervisor info --no-progress --raw-json", timeout=90)
+    supervisor_info = shell_json("ha supervisor info --no-progress --raw-json")
    data = supervisor_info.get("data")
    assert data and data.get("version") == data.get("version_latest")
@ -98,7 +98,7 @@ def test_supervisor_is_updated(shell_json):
 def test_addon_install(shell_json):
    # install Core SSH add-on
    assert (
-        shell_json("ha addons install core_ssh --no-progress --raw-json", timeout=300).get("result") == "ok"
+        shell_json("ha addons install core_ssh --no-progress --raw-json").get("result") == "ok"
    ), "Core SSH add-on install failed"
    # check Core SSH add-on is installed
    assert (
@ -153,6 +153,7 @@ def test_addon_uninstall(shell_json):
@pytest.mark.dependency(depends=["test_supervisor_is_updated"])
@pytest.mark.timeout(450)
 def test_restart_supervisor(shell, shell_json):
    result = shell_json("ha supervisor restart --no-progress --raw-json")
    assert result.get("result") == "ok", f"Supervisor restart failed: {result}"
@ -161,24 +162,20 @@ def test_restart_supervisor(shell, shell_json):
        shell.run_check("docker inspect --format='{{.NetworkSettings.IPAddress}}' hassio_supervisor")
    )
-    for _ in range(100):
+    while True:
        try:
            if shell_json(f"curl -sSL http://{supervisor_ip}/supervisor/ping").get("result") == "ok":
                if shell_json("ha os info --no-progress --raw-json").get("result") == "ok":
                    break
        except ExecutionError:
            pass  # avoid failure when the container is restarting
-        sleep(5)
+
-    else:
+        sleep(1)
        raise AssertionError("Supervisor did not start in time")
@pytest.mark.dependency(depends=["test_create_backup"])
 def test_restore_backup(shell_json, stash):
-    result = shell_json(
+    result = shell_json(f"ha backups restore {stash.get('slug')} --addons core_ssh --no-progress --raw-json")
        f"ha backups restore {stash.get('slug')} --addons core_ssh --no-progress --raw-json",
        timeout=300,
    )
    assert result.get("result") == "ok", f"Backup restore failed: {result}"
    logger.info("Backup restore result: %s", result)