Fix buffer corruption in API message encoding with very verbose logging (#9249)

This commit is contained in:
J. Nick Koston 2025-06-28 15:04:42 -05:00 committed by GitHub
parent 30f61b26ff
commit 13d4823db6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 210 additions and 5 deletions

View File

@ -94,6 +94,19 @@ APIConnection::~APIConnection() {
#endif
}
#ifdef HAS_PROTO_MESSAGE_DUMP
void APIConnection::log_batch_item_(const DeferredBatch::BatchItem &item) {
// Set log-only mode
this->log_only_mode_ = true;
// Call the creator - it will create the message and log it via encode_message_to_buffer
item.creator(item.entity, this, std::numeric_limits<uint16_t>::max(), true, item.message_type);
// Clear log-only mode
this->log_only_mode_ = false;
}
#endif
void APIConnection::loop() {
if (this->next_close_) {
// requested a disconnect
@ -249,6 +262,14 @@ void APIConnection::on_disconnect_response(const DisconnectResponse &value) {
// including header and footer overhead. Returns 0 if the message doesn't fit.
uint16_t APIConnection::encode_message_to_buffer(ProtoMessage &msg, uint16_t message_type, APIConnection *conn,
uint32_t remaining_size, bool is_single) {
#ifdef HAS_PROTO_MESSAGE_DUMP
// If in log-only mode, just log and return
if (conn->log_only_mode_) {
conn->log_send_message_(msg.message_name(), msg.dump());
return 1; // Return non-zero to indicate "success" for logging
}
#endif
// Calculate size
uint32_t calculated_size = 0;
msg.calculate_size(calculated_size);
@ -276,11 +297,6 @@ uint16_t APIConnection::encode_message_to_buffer(ProtoMessage &msg, uint16_t mes
// Encode directly into buffer
msg.encode(buffer);
#ifdef HAS_PROTO_MESSAGE_DUMP
// Log the message for VV debugging
conn->log_send_message_(msg.message_name(), msg.dump());
#endif
// Calculate actual encoded size (not including header that was already added)
size_t actual_payload_size = shared_buf.size() - size_before_encode;
@ -1891,6 +1907,15 @@ void APIConnection::process_batch_() {
}
}
#ifdef HAS_PROTO_MESSAGE_DUMP
// Log messages after send attempt for VV debugging
// It's safe to use the buffer for logging at this point regardless of send result
for (size_t i = 0; i < items_processed; i++) {
const auto &item = this->deferred_batch_.items[i];
this->log_batch_item_(item);
}
#endif
// Handle remaining items more efficiently
if (items_processed < this->deferred_batch_.items.size()) {
// Remove processed items from the beginning

View File

@ -470,6 +470,10 @@ class APIConnection : public APIServerConnection {
bool sent_ping_{false};
bool service_call_subscription_{false};
bool next_close_ = false;
#ifdef HAS_PROTO_MESSAGE_DUMP
// When true, encode_message_to_buffer will only log, not encode
bool log_only_mode_{false};
#endif
uint8_t ping_retries_{0};
// 8 bytes used, no padding needed
@ -627,6 +631,10 @@ class APIConnection : public APIServerConnection {
// State for batch buffer allocation
bool batch_first_message_{false};
#ifdef HAS_PROTO_MESSAGE_DUMP
void log_batch_item_(const DeferredBatch::BatchItem &item);
#endif
// Helper function to schedule a deferred message with known message type
bool schedule_message_(EntityBase *entity, MessageCreator creator, uint16_t message_type) {
this->deferred_batch_.add_item(entity, std::move(creator), message_type);

View File

@ -0,0 +1,89 @@
esphome:
name: vv-logging-test
host:
api:
logger:
level: VERY_VERBOSE
# Enable VV logging for API components where the issue occurs
logs:
api.connection: VERY_VERBOSE
api.service: VERY_VERBOSE
api.proto: VERY_VERBOSE
sensor: VERY_VERBOSE
# Create many sensors that update frequently to generate API traffic
# This will cause many messages to be batched and sent, triggering the
# code path where VV logging could cause buffer corruption
sensor:
- platform: template
name: "Test Sensor 1"
lambda: 'return millis() / 1000.0;'
update_interval: 50ms
unit_of_measurement: "s"
- platform: template
name: "Test Sensor 2"
lambda: 'return (millis() / 1000.0) + 10;'
update_interval: 50ms
unit_of_measurement: "s"
- platform: template
name: "Test Sensor 3"
lambda: 'return (millis() / 1000.0) + 20;'
update_interval: 50ms
unit_of_measurement: "s"
- platform: template
name: "Test Sensor 4"
lambda: 'return (millis() / 1000.0) + 30;'
update_interval: 50ms
unit_of_measurement: "s"
- platform: template
name: "Test Sensor 5"
lambda: 'return (millis() / 1000.0) + 40;'
update_interval: 50ms
unit_of_measurement: "s"
- platform: template
name: "Test Sensor 6"
lambda: 'return (millis() / 1000.0) + 50;'
update_interval: 50ms
unit_of_measurement: "s"
- platform: template
name: "Test Sensor 7"
lambda: 'return (millis() / 1000.0) + 60;'
update_interval: 50ms
unit_of_measurement: "s"
- platform: template
name: "Test Sensor 8"
lambda: 'return (millis() / 1000.0) + 70;'
update_interval: 50ms
unit_of_measurement: "s"
- platform: template
name: "Test Sensor 9"
lambda: 'return (millis() / 1000.0) + 80;'
update_interval: 50ms
unit_of_measurement: "s"
- platform: template
name: "Test Sensor 10"
lambda: 'return (millis() / 1000.0) + 90;'
update_interval: 50ms
unit_of_measurement: "s"
# Add some binary sensors too for variety
binary_sensor:
- platform: template
name: "Test Binary 1"
lambda: 'return (millis() / 1000) % 2 == 0;'
- platform: template
name: "Test Binary 2"
lambda: 'return (millis() / 1000) % 3 == 0;'

View File

@ -0,0 +1,83 @@
"""Integration test for API with VERY_VERBOSE logging to verify no buffer corruption."""
from __future__ import annotations
import asyncio
from typing import Any
from aioesphomeapi import LogLevel
import pytest
from .types import APIClientConnectedFactory, RunCompiledFunction
@pytest.mark.asyncio
async def test_api_vv_logging(
yaml_config: str,
run_compiled: RunCompiledFunction,
api_client_connected: APIClientConnectedFactory,
) -> None:
"""Test that VERY_VERBOSE logging doesn't cause buffer corruption with API messages."""
# Track that we're receiving VV log messages and sensor updates
vv_logs_received = 0
sensor_updates_received = 0
errors_detected = []
def on_log(msg: Any) -> None:
"""Capture log messages."""
nonlocal vv_logs_received
# msg is a SubscribeLogsResponse object with 'message' attribute
# The message field is always bytes
message_text = msg.message.decode("utf-8", errors="replace")
# Only count VV logs specifically
if "[VV]" in message_text:
vv_logs_received += 1
# Check for assertion or error messages
if "assert" in message_text.lower() or "error" in message_text.lower():
errors_detected.append(message_text)
# Write, compile and run the ESPHome device
async with run_compiled(yaml_config), api_client_connected() as client:
# Subscribe to VERY_VERBOSE logs - this enables the code path that could cause corruption
client.subscribe_logs(on_log, log_level=LogLevel.LOG_LEVEL_VERY_VERBOSE)
# Wait for device to be ready
device_info = await client.device_info()
assert device_info is not None
assert device_info.name == "vv-logging-test"
# Subscribe to sensor states
states = {}
def on_state(state):
nonlocal sensor_updates_received
sensor_updates_received += 1
states[state.key] = state
client.subscribe_states(on_state)
# List entities to find our test sensors
entity_info, _ = await client.list_entities_services()
# Count sensors
sensor_count = sum(1 for e in entity_info if hasattr(e, "unit_of_measurement"))
assert sensor_count >= 10, f"Expected at least 10 sensors, got {sensor_count}"
# Wait for sensor updates to flow with VV logging active
# The sensors update every 50ms, so we should get many updates
await asyncio.sleep(0.25)
# Verify we received both VV logs and sensor updates
assert vv_logs_received > 0, "Expected to receive VERY_VERBOSE log messages"
assert sensor_updates_received > 10, (
f"Expected many sensor updates, got {sensor_updates_received}"
)
# Check for any errors
if errors_detected:
pytest.fail(f"Errors detected during test: {errors_detected}")
# The test passes if we didn't hit any assertions or buffer corruption