Compare commits

...

3 Commits

Author SHA1 Message Date
Stefan Agner
9342456b34 Scale progress by fraction of layers that have reported
When pulling Docker images with many layers, tiny layers that complete
immediately would show inflated progress (e.g., 70%) even when most
layers haven't started reporting yet. This made the UI jump to 70%
quickly, then appear stuck during actual download.

The fix scales progress by the fraction of layers that have reported:
- If 2/25 layers report at 70%, progress shows ~5.6% instead of 70%
- As more layers report, progress increases proportionally
- When all layers have reported, no scaling is applied

This ensures progress accurately reflects overall download status rather
than being dominated by a few tiny layers that complete first.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 19:54:39 +01:00
Stefan Agner
2080a2719e Add missing test fixture json 2025-11-27 17:19:58 +01:00
Stefan Agner
6820dbb4d2 Improve progress reporting when layers are locally present
With containerd snapshotter, some layers skip the "Downloading" phase
entirely and go directly from "Pulling fs layer" to "Download complete".
These layers only receive a placeholder extra={current: 1, total: 1}
instead of actual size information.

The previous fix in #6357 still required ALL layers to have extra before
calculating progress, which caused the UI to jump from 0% to 70%+ when
the first real layer reported after cached layers completed.

This change improves progress calculation by:
- Separating "real" layers (with actual size > 1 byte) from "placeholder"
  layers (cached/small layers with size = 1)
- Calculating weighted progress only from real layers
- Not reporting progress until at least one real layer has size info
- Correctly tracking stage when placeholder layers are still extracting

Tested with real-world pull events from a Home Assistant Core update
that had 12 layers where 5 skipped downloading entirely.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 16:28:14 +01:00
3 changed files with 5815 additions and 40 deletions

View File

@@ -312,24 +312,44 @@ class DockerInterface(JobGroup, ABC):
and job.name == "Pulling container image layer"
]
# First set the total bytes to be downloaded/extracted on the main job
if not install_job.extra:
total = 0
for job in layer_jobs:
if not job.extra:
return
total += job.extra["total"]
install_job.extra = {"total": total}
else:
total = install_job.extra["total"]
# Calculate total from layers that have reported size info
# With containerd snapshotter, some layers skip "Downloading" and go directly to
# "Download complete", so we can't wait for all layers to have extra before reporting progress
layers_with_extra = [
job for job in layer_jobs if job.extra and job.extra.get("total")
]
if not layers_with_extra:
return
# Then determine total progress based on progress of each sub-job, factoring in size of each compared to total
# Sum up total bytes. Layers that skip downloading get placeholder extra={1,1}
# which doesn't represent actual size. Separate "real" layers from placeholders.
# Filter guarantees job.extra is not None and has "total" key
real_layers = [
job for job in layers_with_extra if cast(dict, job.extra)["total"] > 1
]
placeholder_layers = [
job for job in layers_with_extra if cast(dict, job.extra)["total"] == 1
]
# If we only have placeholder layers (no real size info yet), don't report progress
# This prevents tiny cached layers from showing inflated progress before
# the actual download sizes are known
if not real_layers:
return
total = sum(cast(dict, job.extra)["total"] for job in real_layers)
if total == 0:
return
# Update install_job.extra with current total (may increase as more layers report)
install_job.extra = {"total": total}
# Calculate progress based on layers that have real size info
# Placeholder layers (skipped downloads) count as complete but don't affect weighted progress
progress = 0.0
stage = PullImageLayerStage.PULL_COMPLETE
for job in layer_jobs:
if not job.extra or not job.extra.get("total"):
return
progress += job.progress * (job.extra["total"] / total)
for job in real_layers:
progress += job.progress * (cast(dict, job.extra)["total"] / total)
job_stage = PullImageLayerStage.from_status(cast(str, job.stage))
if job_stage < PullImageLayerStage.EXTRACTING:
@@ -340,6 +360,28 @@ class DockerInterface(JobGroup, ABC):
):
stage = PullImageLayerStage.EXTRACTING
# Check if any layers are still pending (no extra yet)
# If so, we're still in downloading phase even if all layers_with_extra are done
layers_pending = len(layer_jobs) - len(layers_with_extra)
if layers_pending > 0:
# Scale progress to account for unreported layers
# This prevents tiny layers that complete first from showing inflated progress
# e.g., if 2/25 layers reported at 70%, actual progress is ~70 * 2/25 = 5.6%
layers_fraction = len(layers_with_extra) / len(layer_jobs)
progress = progress * layers_fraction
if stage == PullImageLayerStage.PULL_COMPLETE:
stage = PullImageLayerStage.DOWNLOADING
# Also check if all placeholders are done but we're waiting for real layers
if placeholder_layers and stage == PullImageLayerStage.PULL_COMPLETE:
# All real layers are done, but check if placeholders are still extracting
for job in placeholder_layers:
job_stage = PullImageLayerStage.from_status(cast(str, job.stage))
if job_stage < PullImageLayerStage.PULL_COMPLETE:
stage = PullImageLayerStage.EXTRACTING
break
# Ensure progress is 100 at this point to prevent float drift
if stage == PullImageLayerStage.PULL_COMPLETE:
progress = 100

View File

@@ -709,11 +709,13 @@ async def test_install_progress_handles_layers_skipping_download(
await install_task
await event.wait()
# First update from layer download should have rather low progress ((260937/25445459) / 2 ~ 0.5%)
assert install_job_snapshots[0]["progress"] < 1
# First update from layer download should have rather low progress ((260937/25371463) ~= 1%)
assert install_job_snapshots[0]["progress"] < 2
# Total 8 events should lead to a progress update on the install job
assert len(install_job_snapshots) == 8
# Total 7 events should lead to a progress update on the install job:
# 3 Downloading events + Download complete (70%) + Extracting + Pull complete (100%) + stage change
# Note: The small placeholder layer ({1,1}) is excluded from progress calculation
assert len(install_job_snapshots) == 7
# Job should complete successfully
assert job.done is True
@@ -843,25 +845,107 @@ async def test_install_progress_containerd_snapshot(
},
}
assert [c.args[0] for c in ha_ws_client.async_send_command.call_args_list] == [
# During downloading we get continuous progress updates from download status
job_event(0),
job_event(3.4),
job_event(8.5),
job_event(10.2),
job_event(15.3),
job_event(18.8),
job_event(29.0),
job_event(35.8),
job_event(42.6),
job_event(49.5),
job_event(56.0),
job_event(62.8),
# Downloading phase is considered 70% of total. After we only get one update
# per image downloaded when extraction is finished. It uses the total size
# received during downloading to determine percent complete then.
job_event(70.0),
job_event(84.8),
job_event(100),
job_event(100, True),
# Get progress values from the events
job_events = [
c.args[0]
for c in ha_ws_client.async_send_command.call_args_list
if c.args[0].get("data", {}).get("event") == WSEvent.JOB
and c.args[0].get("data", {}).get("data", {}).get("name")
== "mock_docker_interface_install"
]
progress_values = [e["data"]["data"]["progress"] for e in job_events]
# Should have multiple progress updates (not just 0 and 100)
assert len(progress_values) >= 10, (
f"Expected >=10 progress updates, got {len(progress_values)}"
)
# Progress should be monotonically increasing
for i in range(1, len(progress_values)):
assert progress_values[i] >= progress_values[i - 1], (
f"Progress decreased at index {i}: {progress_values[i - 1]} -> {progress_values[i]}"
)
# Should start at 0 and end at 100
assert progress_values[0] == 0
assert progress_values[-1] == 100
# Should have progress values in the downloading phase (< 70%)
# Note: with layer scaling, early progress may be lower than before
downloading_progress = [p for p in progress_values if 0 < p < 70]
assert len(downloading_progress) > 0, (
"Expected progress updates during downloading phase"
)
async def test_install_progress_containerd_snapshotter_real_world(
coresys: CoreSys, ha_ws_client: AsyncMock
):
"""Test install handles real-world containerd snapshotter events.
This test uses real pull events captured from a Home Assistant Core update
where some layers skip the Downloading phase entirely (going directly from
"Pulling fs layer" to "Download complete"). This causes the bug where progress
jumps from 0 to 100 without intermediate updates.
Root cause: _update_install_job_status() returns early if ANY layer has
extra=None. Layers that skip Downloading don't get extra until Download complete,
so progress cannot be calculated until ALL layers reach Download complete.
"""
coresys.core.set_state(CoreState.RUNNING)
class TestDockerInterface(DockerInterface):
"""Test interface for events."""
@property
def name(self) -> str:
"""Name of test interface."""
return "test_interface"
@Job(
name="mock_docker_interface_install_realworld",
child_job_syncs=[
ChildJobSyncFilter("docker_interface_install", progress_allocation=1.0)
],
)
async def mock_install(self) -> None:
"""Mock install."""
await super().install(
AwesomeVersion("1.2.3"), image="test", arch=CpuArch.I386
)
# Real-world fixture: 12 layers, 262 Downloading events
# Some layers skip Downloading entirely (small layers with containerd snapshotter)
logs = load_json_fixture("docker_pull_image_log_containerd_snapshotter_real.json")
coresys.docker.images.pull.return_value = AsyncIterator(logs)
test_docker_interface = TestDockerInterface(coresys)
with patch.object(Supervisor, "arch", PropertyMock(return_value="i386")):
await test_docker_interface.mock_install()
await asyncio.sleep(1)
# Get progress events for the parent job (what UI sees)
job_events = [
c.args[0]
for c in ha_ws_client.async_send_command.call_args_list
if c.args[0].get("data", {}).get("event") == WSEvent.JOB
and c.args[0].get("data", {}).get("data", {}).get("name")
== "mock_docker_interface_install_realworld"
]
progress_values = [e["data"]["data"]["progress"] for e in job_events]
# We should have intermediate progress updates, not just 0 and 100
assert len(progress_values) > 3, (
f"BUG: Progress jumped 0->100 without intermediate updates. "
f"Got {len(progress_values)} updates: {progress_values}. "
f"Expected intermediate progress during the 262 Downloading events."
)
# Progress should be monotonically increasing
for i in range(1, len(progress_values)):
assert progress_values[i] >= progress_values[i - 1]
# Should see progress in downloading phase (0-70%)
downloading_progress = [p for p in progress_values if 0 < p < 70]
assert len(downloading_progress) > 0

File diff suppressed because it is too large Load Diff