From 95a08579f6a864473e13d8a03118045b31959d3c Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 18 Jul 2025 09:20:08 -1000 Subject: [PATCH 1/5] Fix AsyncTCP version mismatch between platformio.ini and async_tcp component (#9676) --- .clang-tidy.hash | 2 +- platformio.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.clang-tidy.hash b/.clang-tidy.hash index 18be8d78a9..50a7fa9709 100644 --- a/.clang-tidy.hash +++ b/.clang-tidy.hash @@ -1 +1 @@ -07f621354fe1350ba51953c80273cd44a04aa44f15cc30bd7b8fe2a641427b7a +0c2acbc16bfb7d63571dbe7042f94f683be25e4ca8a0f158a960a94adac4b931 diff --git a/platformio.ini b/platformio.ini index 8fcc578103..7fb301c08b 100644 --- a/platformio.ini +++ b/platformio.ini @@ -138,7 +138,7 @@ lib_deps = WiFi ; wifi,web_server_base,ethernet (Arduino built-in) Update ; ota,web_server_base (Arduino built-in) ${common:arduino.lib_deps} - ESP32Async/AsyncTCP@3.4.4 ; async_tcp + ESP32Async/AsyncTCP@3.4.5 ; async_tcp NetworkClientSecure ; http_request,nextion (Arduino built-in) HTTPClient ; http_request,nextion (Arduino built-in) ESPmDNS ; mdns (Arduino built-in) From 3f8494bf8fe40743d63379b3f0aa5f6012fcb5b6 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Fri, 18 Jul 2025 20:21:36 +0100 Subject: [PATCH 2/5] [speaker] Media player's pipeline properly returns playing state near end of file (#9668) --- .../speaker/media_player/audio_pipeline.cpp | 16 ++++++++++++++-- .../speaker/media_player/audio_pipeline.h | 1 + 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/esphome/components/speaker/media_player/audio_pipeline.cpp b/esphome/components/speaker/media_player/audio_pipeline.cpp index 333a076bec..8811ea1644 100644 --- a/esphome/components/speaker/media_player/audio_pipeline.cpp +++ b/esphome/components/speaker/media_player/audio_pipeline.cpp @@ -200,7 +200,7 @@ AudioPipelineState AudioPipeline::process_state() { if ((this->read_task_handle_ != nullptr) || (this->decode_task_handle_ != nullptr)) { this->delete_tasks_(); if (this->hard_stop_) { - // Stop command was sent, so immediately end of the playback + // Stop command was sent, so immediately end the playback this->speaker_->stop(); this->hard_stop_ = false; } else { @@ -210,13 +210,25 @@ AudioPipelineState AudioPipeline::process_state() { } } this->is_playing_ = false; - return AudioPipelineState::STOPPED; + if (!this->speaker_->is_running()) { + return AudioPipelineState::STOPPED; + } else { + this->is_finishing_ = true; + } } if (this->pause_state_) { return AudioPipelineState::PAUSED; } + if (this->is_finishing_) { + if (!this->speaker_->is_running()) { + this->is_finishing_ = false; + } else { + return AudioPipelineState::PLAYING; + } + } + if ((this->read_task_handle_ == nullptr) && (this->decode_task_handle_ == nullptr)) { // No tasks are running, so the pipeline is stopped. xEventGroupClearBits(this->event_group_, EventGroupBits::PIPELINE_COMMAND_STOP); diff --git a/esphome/components/speaker/media_player/audio_pipeline.h b/esphome/components/speaker/media_player/audio_pipeline.h index 722d9cbb2a..98f43fda6e 100644 --- a/esphome/components/speaker/media_player/audio_pipeline.h +++ b/esphome/components/speaker/media_player/audio_pipeline.h @@ -114,6 +114,7 @@ class AudioPipeline { bool hard_stop_{false}; bool is_playing_{false}; + bool is_finishing_{false}; bool pause_state_{false}; bool task_stack_in_psram_; From cb8d9dca2a9758415485275064f31f2dbda29ee5 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Fri, 18 Jul 2025 20:24:55 +0100 Subject: [PATCH 3/5] [voice_assistant] Use media player callbacks to track TTS response status (#9670) --- .../voice_assistant/voice_assistant.cpp | 72 +++++++++++++------ .../voice_assistant/voice_assistant.h | 13 +++- 2 files changed, 63 insertions(+), 22 deletions(-) diff --git a/esphome/components/voice_assistant/voice_assistant.cpp b/esphome/components/voice_assistant/voice_assistant.cpp index a8cb22ccc9..3c69dafa43 100644 --- a/esphome/components/voice_assistant/voice_assistant.cpp +++ b/esphome/components/voice_assistant/voice_assistant.cpp @@ -35,6 +35,27 @@ void VoiceAssistant::setup() { temp_ring_buffer->write((void *) data.data(), data.size()); } }); + +#ifdef USE_MEDIA_PLAYER + if (this->media_player_ != nullptr) { + this->media_player_->add_on_state_callback([this]() { + switch (this->media_player_->state) { + case media_player::MediaPlayerState::MEDIA_PLAYER_STATE_ANNOUNCING: + if (this->media_player_response_state_ == MediaPlayerResponseState::URL_SENT) { + // State changed to announcing after receiving the url + this->media_player_response_state_ = MediaPlayerResponseState::PLAYING; + } + break; + default: + if (this->media_player_response_state_ == MediaPlayerResponseState::PLAYING) { + // No longer announcing the TTS response + this->media_player_response_state_ = MediaPlayerResponseState::FINISHED; + } + break; + } + }); + } +#endif } float VoiceAssistant::get_setup_priority() const { return setup_priority::AFTER_CONNECTION; } @@ -223,6 +244,13 @@ void VoiceAssistant::loop() { msg.wake_word_phrase = this->wake_word_; this->wake_word_ = ""; + // Reset media player state tracking +#ifdef USE_MEDIA_PLAYER + if (this->media_player_ != nullptr) { + this->media_player_response_state_ = MediaPlayerResponseState::IDLE; + } +#endif + if (this->api_client_ == nullptr || !this->api_client_->send_message(msg, api::VoiceAssistantRequest::MESSAGE_TYPE)) { ESP_LOGW(TAG, "Could not request start"); @@ -315,17 +343,10 @@ void VoiceAssistant::loop() { #endif #ifdef USE_MEDIA_PLAYER if (this->media_player_ != nullptr) { - playing = (this->media_player_->state == media_player::MediaPlayerState::MEDIA_PLAYER_STATE_ANNOUNCING); + playing = (this->media_player_response_state_ == MediaPlayerResponseState::PLAYING); - if (playing && this->media_player_wait_for_announcement_start_) { - // Announcement has started playing, wait for it to finish - this->media_player_wait_for_announcement_start_ = false; - this->media_player_wait_for_announcement_end_ = true; - } - - if (!playing && this->media_player_wait_for_announcement_end_) { - // Announcement has finished playing - this->media_player_wait_for_announcement_end_ = false; + if (this->media_player_response_state_ == MediaPlayerResponseState::FINISHED) { + this->media_player_response_state_ = MediaPlayerResponseState::IDLE; this->cancel_timeout("playing"); ESP_LOGD(TAG, "Announcement finished playing"); this->set_state_(State::RESPONSE_FINISHED, State::RESPONSE_FINISHED); @@ -556,7 +577,7 @@ void VoiceAssistant::request_stop() { break; case State::AWAITING_RESPONSE: this->signal_stop_(); - // Fallthrough intended to stop a streaming TTS announcement that has potentially started + break; case State::STREAMING_RESPONSE: #ifdef USE_MEDIA_PLAYER // Stop any ongoing media player announcement @@ -566,6 +587,10 @@ void VoiceAssistant::request_stop() { .set_announcement(true) .perform(); } + if (this->started_streaming_tts_) { + // Haven't reached the TTS_END stage, so send the stop signal to HA. + this->signal_stop_(); + } #endif break; case State::RESPONSE_FINISHED: @@ -649,13 +674,16 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) { if (this->media_player_ != nullptr) { for (const auto &arg : msg.data) { if ((arg.name == "tts_start_streaming") && (arg.value == "1") && !this->tts_response_url_.empty()) { + this->media_player_response_state_ = MediaPlayerResponseState::URL_SENT; + this->media_player_->make_call().set_media_url(this->tts_response_url_).set_announcement(true).perform(); - this->media_player_wait_for_announcement_start_ = true; - this->media_player_wait_for_announcement_end_ = false; this->started_streaming_tts_ = true; + this->start_playback_timeout_(); + tts_url_for_trigger = this->tts_response_url_; this->tts_response_url_.clear(); // Reset streaming URL + this->set_state_(State::STREAMING_RESPONSE, State::STREAMING_RESPONSE); } } } @@ -714,18 +742,22 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) { this->defer([this, url]() { #ifdef USE_MEDIA_PLAYER if ((this->media_player_ != nullptr) && (!this->started_streaming_tts_)) { + this->media_player_response_state_ = MediaPlayerResponseState::URL_SENT; + this->media_player_->make_call().set_media_url(url).set_announcement(true).perform(); - this->media_player_wait_for_announcement_start_ = true; - this->media_player_wait_for_announcement_end_ = false; - // Start the playback timeout, as the media player state isn't immediately updated this->start_playback_timeout_(); } + this->started_streaming_tts_ = false; // Helps indicate reaching the TTS_END stage #endif this->tts_end_trigger_->trigger(url); }); State new_state = this->local_output_ ? State::STREAMING_RESPONSE : State::IDLE; - this->set_state_(new_state, new_state); + if (new_state != this->state_) { + // Don't needlessly change the state. The intent progress stage may have already changed the state to streaming + // response. + this->set_state_(new_state, new_state); + } break; } case api::enums::VOICE_ASSISTANT_RUN_END: { @@ -876,6 +908,9 @@ void VoiceAssistant::on_announce(const api::VoiceAssistantAnnounceRequest &msg) #ifdef USE_MEDIA_PLAYER if (this->media_player_ != nullptr) { this->tts_start_trigger_->trigger(msg.text); + + this->media_player_response_state_ = MediaPlayerResponseState::URL_SENT; + if (!msg.preannounce_media_id.empty()) { this->media_player_->make_call().set_media_url(msg.preannounce_media_id).set_announcement(true).perform(); } @@ -887,9 +922,6 @@ void VoiceAssistant::on_announce(const api::VoiceAssistantAnnounceRequest &msg) .perform(); this->continue_conversation_ = msg.start_conversation; - this->media_player_wait_for_announcement_start_ = true; - this->media_player_wait_for_announcement_end_ = false; - // Start the playback timeout, as the media player state isn't immediately updated this->start_playback_timeout_(); if (this->continuous_) { diff --git a/esphome/components/voice_assistant/voice_assistant.h b/esphome/components/voice_assistant/voice_assistant.h index 2424ea6052..95f77dbf09 100644 --- a/esphome/components/voice_assistant/voice_assistant.h +++ b/esphome/components/voice_assistant/voice_assistant.h @@ -90,6 +90,15 @@ struct Configuration { uint32_t max_active_wake_words; }; +#ifdef USE_MEDIA_PLAYER +enum class MediaPlayerResponseState { + IDLE, + URL_SENT, + PLAYING, + FINISHED, +}; +#endif + class VoiceAssistant : public Component { public: VoiceAssistant(); @@ -272,8 +281,8 @@ class VoiceAssistant : public Component { media_player::MediaPlayer *media_player_{nullptr}; std::string tts_response_url_{""}; bool started_streaming_tts_{false}; - bool media_player_wait_for_announcement_start_{false}; - bool media_player_wait_for_announcement_end_{false}; + + MediaPlayerResponseState media_player_response_state_{MediaPlayerResponseState::IDLE}; #endif bool local_output_{false}; From 08407706aa58fd3874be49e058c20dd8f6e65825 Mon Sep 17 00:00:00 2001 From: "@RubenKelevra" Date: Fri, 18 Jul 2025 21:28:13 +0200 Subject: [PATCH 4/5] esp32cam: add fb location config option (#9630) --- esphome/components/esp32_camera/__init__.py | 12 ++++++++++++ esphome/components/esp32_camera/esp32_camera.cpp | 9 +++++++-- esphome/components/esp32_camera/esp32_camera.h | 1 + tests/components/esp32_camera/common.yaml | 1 + 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/esphome/components/esp32_camera/__init__.py b/esphome/components/esp32_camera/__init__.py index a99ec34087..43e71df432 100644 --- a/esphome/components/esp32_camera/__init__.py +++ b/esphome/components/esp32_camera/__init__.py @@ -119,6 +119,12 @@ ENUM_SPECIAL_EFFECT = { "SEPIA": ESP32SpecialEffect.ESP32_SPECIAL_EFFECT_SEPIA, } +camera_fb_location_t = cg.global_ns.enum("camera_fb_location_t") +ENUM_FB_LOCATION = { + "PSRAM": cg.global_ns.CAMERA_FB_IN_PSRAM, + "DRAM": cg.global_ns.CAMERA_FB_IN_DRAM, +} + # pin assignment CONF_HREF_PIN = "href_pin" CONF_PIXEL_CLOCK_PIN = "pixel_clock_pin" @@ -149,6 +155,7 @@ CONF_MAX_FRAMERATE = "max_framerate" CONF_IDLE_FRAMERATE = "idle_framerate" # frame buffer CONF_FRAME_BUFFER_COUNT = "frame_buffer_count" +CONF_FRAME_BUFFER_LOCATION = "frame_buffer_location" # stream trigger CONF_ON_STREAM_START = "on_stream_start" @@ -230,6 +237,9 @@ CONFIG_SCHEMA = cv.All( cv.framerate, cv.Range(min=0, max=1) ), cv.Optional(CONF_FRAME_BUFFER_COUNT, default=1): cv.int_range(min=1, max=2), + cv.Optional(CONF_FRAME_BUFFER_LOCATION, default="PSRAM"): cv.enum( + ENUM_FB_LOCATION, upper=True + ), cv.Optional(CONF_ON_STREAM_START): automation.validate_automation( { cv.GenerateID(CONF_TRIGGER_ID): cv.declare_id( @@ -301,6 +311,7 @@ SETTERS = { CONF_WB_MODE: "set_wb_mode", # test pattern CONF_TEST_PATTERN: "set_test_pattern", + CONF_FRAME_BUFFER_LOCATION: "set_frame_buffer_location", } @@ -328,6 +339,7 @@ async def to_code(config): else: cg.add(var.set_idle_update_interval(1000 / config[CONF_IDLE_FRAMERATE])) cg.add(var.set_frame_buffer_count(config[CONF_FRAME_BUFFER_COUNT])) + cg.add(var.set_frame_buffer_location(config[CONF_FRAME_BUFFER_LOCATION])) cg.add(var.set_frame_size(config[CONF_RESOLUTION])) cg.add_define("USE_CAMERA") diff --git a/esphome/components/esp32_camera/esp32_camera.cpp b/esphome/components/esp32_camera/esp32_camera.cpp index eadb8a4408..38bd8d5822 100644 --- a/esphome/components/esp32_camera/esp32_camera.cpp +++ b/esphome/components/esp32_camera/esp32_camera.cpp @@ -133,6 +133,7 @@ void ESP32Camera::dump_config() { ESP_LOGCONFIG(TAG, " JPEG Quality: %u\n" " Framebuffer Count: %u\n" + " Framebuffer Location: %s\n" " Contrast: %d\n" " Brightness: %d\n" " Saturation: %d\n" @@ -140,8 +141,9 @@ void ESP32Camera::dump_config() { " Horizontal Mirror: %s\n" " Special Effect: %u\n" " White Balance Mode: %u", - st.quality, conf.fb_count, st.contrast, st.brightness, st.saturation, ONOFF(st.vflip), - ONOFF(st.hmirror), st.special_effect, st.wb_mode); + st.quality, conf.fb_count, this->config_.fb_location == CAMERA_FB_IN_PSRAM ? "PSRAM" : "DRAM", + st.contrast, st.brightness, st.saturation, ONOFF(st.vflip), ONOFF(st.hmirror), st.special_effect, + st.wb_mode); // ESP_LOGCONFIG(TAG, " Auto White Balance: %u", st.awb); // ESP_LOGCONFIG(TAG, " Auto White Balance Gain: %u", st.awb_gain); ESP_LOGCONFIG(TAG, @@ -350,6 +352,9 @@ void ESP32Camera::set_frame_buffer_count(uint8_t fb_count) { this->config_.fb_count = fb_count; this->set_frame_buffer_mode(fb_count > 1 ? CAMERA_GRAB_LATEST : CAMERA_GRAB_WHEN_EMPTY); } +void ESP32Camera::set_frame_buffer_location(camera_fb_location_t fb_location) { + this->config_.fb_location = fb_location; +} /* ---------------- public API (specific) ---------------- */ void ESP32Camera::add_image_callback(std::function)> &&callback) { diff --git a/esphome/components/esp32_camera/esp32_camera.h b/esphome/components/esp32_camera/esp32_camera.h index 8ce3faf039..0e7f7c0ea6 100644 --- a/esphome/components/esp32_camera/esp32_camera.h +++ b/esphome/components/esp32_camera/esp32_camera.h @@ -152,6 +152,7 @@ class ESP32Camera : public camera::Camera { /* -- frame buffer */ void set_frame_buffer_mode(camera_grab_mode_t mode); void set_frame_buffer_count(uint8_t fb_count); + void set_frame_buffer_location(camera_fb_location_t fb_location); /* public API (derivated) */ void setup() override; diff --git a/tests/components/esp32_camera/common.yaml b/tests/components/esp32_camera/common.yaml index 2f5f792f1c..64f75c699a 100644 --- a/tests/components/esp32_camera/common.yaml +++ b/tests/components/esp32_camera/common.yaml @@ -22,6 +22,7 @@ esp32_camera: power_down_pin: 1 resolution: 640x480 jpeg_quality: 10 + frame_buffer_location: PSRAM on_image: then: - lambda: |- From 60350e8abd3489b3cc7af6e092e88d85b12c908d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 18 Jul 2025 20:08:29 +0000 Subject: [PATCH 5/5] Bump aioesphomeapi from 37.0.0 to 37.0.1 (#9685) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index acfa31ddca..4d94ce5557 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,7 @@ platformio==6.1.18 # When updating platformio, also update /docker/Dockerfile esptool==4.9.0 click==8.1.7 esphome-dashboard==20250514.0 -aioesphomeapi==37.0.0 +aioesphomeapi==37.0.1 zeroconf==0.147.0 puremagic==1.30 ruamel.yaml==0.18.14 # dashboard_import