mirror of
https://github.com/esphome/esphome.git
synced 2025-07-28 14:16:40 +00:00
[voice_assistant] Use media player callbacks to track TTS response status (#9670)
This commit is contained in:
parent
8664ec0a3b
commit
84607c1255
@ -35,6 +35,27 @@ void VoiceAssistant::setup() {
|
|||||||
temp_ring_buffer->write((void *) data.data(), data.size());
|
temp_ring_buffer->write((void *) data.data(), data.size());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
#ifdef USE_MEDIA_PLAYER
|
||||||
|
if (this->media_player_ != nullptr) {
|
||||||
|
this->media_player_->add_on_state_callback([this]() {
|
||||||
|
switch (this->media_player_->state) {
|
||||||
|
case media_player::MediaPlayerState::MEDIA_PLAYER_STATE_ANNOUNCING:
|
||||||
|
if (this->media_player_response_state_ == MediaPlayerResponseState::URL_SENT) {
|
||||||
|
// State changed to announcing after receiving the url
|
||||||
|
this->media_player_response_state_ = MediaPlayerResponseState::PLAYING;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (this->media_player_response_state_ == MediaPlayerResponseState::PLAYING) {
|
||||||
|
// No longer announcing the TTS response
|
||||||
|
this->media_player_response_state_ = MediaPlayerResponseState::FINISHED;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
float VoiceAssistant::get_setup_priority() const { return setup_priority::AFTER_CONNECTION; }
|
float VoiceAssistant::get_setup_priority() const { return setup_priority::AFTER_CONNECTION; }
|
||||||
@ -223,6 +244,13 @@ void VoiceAssistant::loop() {
|
|||||||
msg.wake_word_phrase = this->wake_word_;
|
msg.wake_word_phrase = this->wake_word_;
|
||||||
this->wake_word_ = "";
|
this->wake_word_ = "";
|
||||||
|
|
||||||
|
// Reset media player state tracking
|
||||||
|
#ifdef USE_MEDIA_PLAYER
|
||||||
|
if (this->media_player_ != nullptr) {
|
||||||
|
this->media_player_response_state_ = MediaPlayerResponseState::IDLE;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (this->api_client_ == nullptr || !this->api_client_->send_message(msg)) {
|
if (this->api_client_ == nullptr || !this->api_client_->send_message(msg)) {
|
||||||
ESP_LOGW(TAG, "Could not request start");
|
ESP_LOGW(TAG, "Could not request start");
|
||||||
this->error_trigger_->trigger("not-connected", "Could not request start");
|
this->error_trigger_->trigger("not-connected", "Could not request start");
|
||||||
@ -314,17 +342,10 @@ void VoiceAssistant::loop() {
|
|||||||
#endif
|
#endif
|
||||||
#ifdef USE_MEDIA_PLAYER
|
#ifdef USE_MEDIA_PLAYER
|
||||||
if (this->media_player_ != nullptr) {
|
if (this->media_player_ != nullptr) {
|
||||||
playing = (this->media_player_->state == media_player::MediaPlayerState::MEDIA_PLAYER_STATE_ANNOUNCING);
|
playing = (this->media_player_response_state_ == MediaPlayerResponseState::PLAYING);
|
||||||
|
|
||||||
if (playing && this->media_player_wait_for_announcement_start_) {
|
if (this->media_player_response_state_ == MediaPlayerResponseState::FINISHED) {
|
||||||
// Announcement has started playing, wait for it to finish
|
this->media_player_response_state_ = MediaPlayerResponseState::IDLE;
|
||||||
this->media_player_wait_for_announcement_start_ = false;
|
|
||||||
this->media_player_wait_for_announcement_end_ = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!playing && this->media_player_wait_for_announcement_end_) {
|
|
||||||
// Announcement has finished playing
|
|
||||||
this->media_player_wait_for_announcement_end_ = false;
|
|
||||||
this->cancel_timeout("playing");
|
this->cancel_timeout("playing");
|
||||||
ESP_LOGD(TAG, "Announcement finished playing");
|
ESP_LOGD(TAG, "Announcement finished playing");
|
||||||
this->set_state_(State::RESPONSE_FINISHED, State::RESPONSE_FINISHED);
|
this->set_state_(State::RESPONSE_FINISHED, State::RESPONSE_FINISHED);
|
||||||
@ -555,7 +576,7 @@ void VoiceAssistant::request_stop() {
|
|||||||
break;
|
break;
|
||||||
case State::AWAITING_RESPONSE:
|
case State::AWAITING_RESPONSE:
|
||||||
this->signal_stop_();
|
this->signal_stop_();
|
||||||
// Fallthrough intended to stop a streaming TTS announcement that has potentially started
|
break;
|
||||||
case State::STREAMING_RESPONSE:
|
case State::STREAMING_RESPONSE:
|
||||||
#ifdef USE_MEDIA_PLAYER
|
#ifdef USE_MEDIA_PLAYER
|
||||||
// Stop any ongoing media player announcement
|
// Stop any ongoing media player announcement
|
||||||
@ -565,6 +586,10 @@ void VoiceAssistant::request_stop() {
|
|||||||
.set_announcement(true)
|
.set_announcement(true)
|
||||||
.perform();
|
.perform();
|
||||||
}
|
}
|
||||||
|
if (this->started_streaming_tts_) {
|
||||||
|
// Haven't reached the TTS_END stage, so send the stop signal to HA.
|
||||||
|
this->signal_stop_();
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case State::RESPONSE_FINISHED:
|
case State::RESPONSE_FINISHED:
|
||||||
@ -648,13 +673,16 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
|
|||||||
if (this->media_player_ != nullptr) {
|
if (this->media_player_ != nullptr) {
|
||||||
for (const auto &arg : msg.data) {
|
for (const auto &arg : msg.data) {
|
||||||
if ((arg.name == "tts_start_streaming") && (arg.value == "1") && !this->tts_response_url_.empty()) {
|
if ((arg.name == "tts_start_streaming") && (arg.value == "1") && !this->tts_response_url_.empty()) {
|
||||||
|
this->media_player_response_state_ = MediaPlayerResponseState::URL_SENT;
|
||||||
|
|
||||||
this->media_player_->make_call().set_media_url(this->tts_response_url_).set_announcement(true).perform();
|
this->media_player_->make_call().set_media_url(this->tts_response_url_).set_announcement(true).perform();
|
||||||
|
|
||||||
this->media_player_wait_for_announcement_start_ = true;
|
|
||||||
this->media_player_wait_for_announcement_end_ = false;
|
|
||||||
this->started_streaming_tts_ = true;
|
this->started_streaming_tts_ = true;
|
||||||
|
this->start_playback_timeout_();
|
||||||
|
|
||||||
tts_url_for_trigger = this->tts_response_url_;
|
tts_url_for_trigger = this->tts_response_url_;
|
||||||
this->tts_response_url_.clear(); // Reset streaming URL
|
this->tts_response_url_.clear(); // Reset streaming URL
|
||||||
|
this->set_state_(State::STREAMING_RESPONSE, State::STREAMING_RESPONSE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -713,18 +741,22 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
|
|||||||
this->defer([this, url]() {
|
this->defer([this, url]() {
|
||||||
#ifdef USE_MEDIA_PLAYER
|
#ifdef USE_MEDIA_PLAYER
|
||||||
if ((this->media_player_ != nullptr) && (!this->started_streaming_tts_)) {
|
if ((this->media_player_ != nullptr) && (!this->started_streaming_tts_)) {
|
||||||
|
this->media_player_response_state_ = MediaPlayerResponseState::URL_SENT;
|
||||||
|
|
||||||
this->media_player_->make_call().set_media_url(url).set_announcement(true).perform();
|
this->media_player_->make_call().set_media_url(url).set_announcement(true).perform();
|
||||||
|
|
||||||
this->media_player_wait_for_announcement_start_ = true;
|
|
||||||
this->media_player_wait_for_announcement_end_ = false;
|
|
||||||
// Start the playback timeout, as the media player state isn't immediately updated
|
|
||||||
this->start_playback_timeout_();
|
this->start_playback_timeout_();
|
||||||
}
|
}
|
||||||
|
this->started_streaming_tts_ = false; // Helps indicate reaching the TTS_END stage
|
||||||
#endif
|
#endif
|
||||||
this->tts_end_trigger_->trigger(url);
|
this->tts_end_trigger_->trigger(url);
|
||||||
});
|
});
|
||||||
State new_state = this->local_output_ ? State::STREAMING_RESPONSE : State::IDLE;
|
State new_state = this->local_output_ ? State::STREAMING_RESPONSE : State::IDLE;
|
||||||
this->set_state_(new_state, new_state);
|
if (new_state != this->state_) {
|
||||||
|
// Don't needlessly change the state. The intent progress stage may have already changed the state to streaming
|
||||||
|
// response.
|
||||||
|
this->set_state_(new_state, new_state);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case api::enums::VOICE_ASSISTANT_RUN_END: {
|
case api::enums::VOICE_ASSISTANT_RUN_END: {
|
||||||
@ -875,6 +907,9 @@ void VoiceAssistant::on_announce(const api::VoiceAssistantAnnounceRequest &msg)
|
|||||||
#ifdef USE_MEDIA_PLAYER
|
#ifdef USE_MEDIA_PLAYER
|
||||||
if (this->media_player_ != nullptr) {
|
if (this->media_player_ != nullptr) {
|
||||||
this->tts_start_trigger_->trigger(msg.text);
|
this->tts_start_trigger_->trigger(msg.text);
|
||||||
|
|
||||||
|
this->media_player_response_state_ = MediaPlayerResponseState::URL_SENT;
|
||||||
|
|
||||||
if (!msg.preannounce_media_id.empty()) {
|
if (!msg.preannounce_media_id.empty()) {
|
||||||
this->media_player_->make_call().set_media_url(msg.preannounce_media_id).set_announcement(true).perform();
|
this->media_player_->make_call().set_media_url(msg.preannounce_media_id).set_announcement(true).perform();
|
||||||
}
|
}
|
||||||
@ -886,9 +921,6 @@ void VoiceAssistant::on_announce(const api::VoiceAssistantAnnounceRequest &msg)
|
|||||||
.perform();
|
.perform();
|
||||||
this->continue_conversation_ = msg.start_conversation;
|
this->continue_conversation_ = msg.start_conversation;
|
||||||
|
|
||||||
this->media_player_wait_for_announcement_start_ = true;
|
|
||||||
this->media_player_wait_for_announcement_end_ = false;
|
|
||||||
// Start the playback timeout, as the media player state isn't immediately updated
|
|
||||||
this->start_playback_timeout_();
|
this->start_playback_timeout_();
|
||||||
|
|
||||||
if (this->continuous_) {
|
if (this->continuous_) {
|
||||||
|
@ -90,6 +90,15 @@ struct Configuration {
|
|||||||
uint32_t max_active_wake_words;
|
uint32_t max_active_wake_words;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef USE_MEDIA_PLAYER
|
||||||
|
enum class MediaPlayerResponseState {
|
||||||
|
IDLE,
|
||||||
|
URL_SENT,
|
||||||
|
PLAYING,
|
||||||
|
FINISHED,
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
class VoiceAssistant : public Component {
|
class VoiceAssistant : public Component {
|
||||||
public:
|
public:
|
||||||
VoiceAssistant();
|
VoiceAssistant();
|
||||||
@ -272,8 +281,8 @@ class VoiceAssistant : public Component {
|
|||||||
media_player::MediaPlayer *media_player_{nullptr};
|
media_player::MediaPlayer *media_player_{nullptr};
|
||||||
std::string tts_response_url_{""};
|
std::string tts_response_url_{""};
|
||||||
bool started_streaming_tts_{false};
|
bool started_streaming_tts_{false};
|
||||||
bool media_player_wait_for_announcement_start_{false};
|
|
||||||
bool media_player_wait_for_announcement_end_{false};
|
MediaPlayerResponseState media_player_response_state_{MediaPlayerResponseState::IDLE};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool local_output_{false};
|
bool local_output_{false};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user