From c7f597bc753deef9c49d0edb95f7e4c79e5f170d Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Thu, 1 May 2025 06:11:09 -0500 Subject: [PATCH] [voice_assistant] voice assistant can configure enabled wake words (#8657) --- .../components/voice_assistant/__init__.py | 16 +++-- .../voice_assistant/voice_assistant.cpp | 53 ++++++++++++++ .../voice_assistant/voice_assistant.h | 20 ++++-- .../voice_assistant/common-idf.yaml | 69 +++++++++++++++++++ .../voice_assistant/test.esp32-c3-idf.yaml | 2 +- .../voice_assistant/test.esp32-idf.yaml | 2 +- 6 files changed, 150 insertions(+), 12 deletions(-) create mode 100644 tests/components/voice_assistant/common-idf.yaml diff --git a/esphome/components/voice_assistant/__init__.py b/esphome/components/voice_assistant/__init__.py index ca0b6da742..b9309ab422 100644 --- a/esphome/components/voice_assistant/__init__.py +++ b/esphome/components/voice_assistant/__init__.py @@ -1,7 +1,7 @@ from esphome import automation from esphome.automation import register_action, register_condition import esphome.codegen as cg -from esphome.components import media_player, microphone, speaker +from esphome.components import media_player, micro_wake_word, microphone, speaker import esphome.config_validation as cv from esphome.const import ( CONF_ID, @@ -41,6 +41,7 @@ CONF_AUTO_GAIN = "auto_gain" CONF_NOISE_SUPPRESSION_LEVEL = "noise_suppression_level" CONF_VOLUME_MULTIPLIER = "volume_multiplier" +CONF_MICRO_WAKE_WORD = "micro_wake_word" CONF_WAKE_WORD = "wake_word" CONF_CONVERSATION_TIMEOUT = "conversation_timeout" @@ -96,11 +97,12 @@ CONFIG_SCHEMA = cv.All( min_channels=1, max_channels=1, ), - cv.Exclusive(CONF_SPEAKER, "output"): cv.use_id(speaker.Speaker), cv.Exclusive(CONF_MEDIA_PLAYER, "output"): cv.use_id( media_player.MediaPlayer ), + cv.Exclusive(CONF_SPEAKER, "output"): cv.use_id(speaker.Speaker), cv.Optional(CONF_USE_WAKE_WORD, default=False): cv.boolean, + cv.Optional(CONF_MICRO_WAKE_WORD): cv.use_id(micro_wake_word.MicroWakeWord), cv.Optional(CONF_VAD_THRESHOLD): cv.invalid( "VAD threshold is no longer supported, as it requires the deprecated esp_adf external component. Use an i2s_audio microphone/speaker instead. Additionally, you may need to configure the audio_adc and audio_dac components depending on your hardware." ), @@ -191,14 +193,18 @@ async def to_code(config): mic_source = await microphone.microphone_source_to_code(config[CONF_MICROPHONE]) cg.add(var.set_microphone_source(mic_source)) - if CONF_SPEAKER in config: - spkr = await cg.get_variable(config[CONF_SPEAKER]) - cg.add(var.set_speaker(spkr)) + if CONF_MICRO_WAKE_WORD in config: + mww = await cg.get_variable(config[CONF_MICRO_WAKE_WORD]) + cg.add(var.set_micro_wake_word(mww)) if CONF_MEDIA_PLAYER in config: mp = await cg.get_variable(config[CONF_MEDIA_PLAYER]) cg.add(var.set_media_player(mp)) + if CONF_SPEAKER in config: + spkr = await cg.get_variable(config[CONF_SPEAKER]) + cg.add(var.set_speaker(spkr)) + cg.add(var.set_use_wake_word(config[CONF_USE_WAKE_WORD])) if (vad_threshold := config.get(CONF_VAD_THRESHOLD)) is not None: diff --git a/esphome/components/voice_assistant/voice_assistant.cpp b/esphome/components/voice_assistant/voice_assistant.cpp index 37b97239c8..d35717ef91 100644 --- a/esphome/components/voice_assistant/voice_assistant.cpp +++ b/esphome/components/voice_assistant/voice_assistant.cpp @@ -869,6 +869,59 @@ void VoiceAssistant::on_announce(const api::VoiceAssistantAnnounceRequest &msg) #endif } +void VoiceAssistant::on_set_configuration(const std::vector &active_wake_words) { +#ifdef USE_MICRO_WAKE_WORD + if (this->micro_wake_word_) { + // Disable all wake words first + for (auto &model : this->micro_wake_word_->get_wake_words()) { + model->disable(); + } + + // Enable only active wake words + for (auto ww_id : active_wake_words) { + for (auto &model : this->micro_wake_word_->get_wake_words()) { + if (model->get_id() == ww_id) { + model->enable(); + ESP_LOGD(TAG, "Enabled wake word: %s (id=%s)", model->get_wake_word().c_str(), model->get_id().c_str()); + } + } + } + } +#endif +}; + +const Configuration &VoiceAssistant::get_configuration() { + this->config_.available_wake_words.clear(); + this->config_.active_wake_words.clear(); + +#ifdef USE_MICRO_WAKE_WORD + if (this->micro_wake_word_) { + this->config_.max_active_wake_words = 1; + + for (auto &model : this->micro_wake_word_->get_wake_words()) { + if (model->is_enabled()) { + this->config_.active_wake_words.push_back(model->get_id()); + } + + WakeWord wake_word; + wake_word.id = model->get_id(); + wake_word.wake_word = model->get_wake_word(); + for (const auto &lang : model->get_trained_languages()) { + wake_word.trained_languages.push_back(lang); + } + this->config_.available_wake_words.push_back(std::move(wake_word)); + } + } else { +#endif + // No microWakeWord + this->config_.max_active_wake_words = 0; +#ifdef USE_MICRO_WAKE_WORD + } +#endif + + return this->config_; +}; + VoiceAssistant *global_voice_assistant = nullptr; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) } // namespace voice_assistant diff --git a/esphome/components/voice_assistant/voice_assistant.h b/esphome/components/voice_assistant/voice_assistant.h index 7122d69527..865731522f 100644 --- a/esphome/components/voice_assistant/voice_assistant.h +++ b/esphome/components/voice_assistant/voice_assistant.h @@ -12,12 +12,15 @@ #include "esphome/components/api/api_connection.h" #include "esphome/components/api/api_pb2.h" #include "esphome/components/microphone/microphone_source.h" -#ifdef USE_SPEAKER -#include "esphome/components/speaker/speaker.h" -#endif #ifdef USE_MEDIA_PLAYER #include "esphome/components/media_player/media_player.h" #endif +#ifdef USE_MICRO_WAKE_WORD +#include "esphome/components/micro_wake_word/micro_wake_word.h" +#endif +#ifdef USE_SPEAKER +#include "esphome/components/speaker/speaker.h" +#endif #include "esphome/components/socket/socket.h" #include @@ -99,6 +102,9 @@ class VoiceAssistant : public Component { void failed_to_start(); void set_microphone_source(microphone::MicrophoneSource *mic_source) { this->mic_source_ = mic_source; } +#ifdef USE_MICRO_WAKE_WORD + void set_micro_wake_word(micro_wake_word::MicroWakeWord *mww) { this->micro_wake_word_ = mww; } +#endif #ifdef USE_SPEAKER void set_speaker(speaker::Speaker *speaker) { this->speaker_ = speaker; @@ -152,8 +158,8 @@ class VoiceAssistant : public Component { void on_audio(const api::VoiceAssistantAudio &msg); void on_timer_event(const api::VoiceAssistantTimerEventResponse &msg); void on_announce(const api::VoiceAssistantAnnounceRequest &msg); - void on_set_configuration(const std::vector &active_wake_words){}; - const Configuration &get_configuration() { return this->config_; }; + void on_set_configuration(const std::vector &active_wake_words); + const Configuration &get_configuration(); bool is_running() const { return this->state_ != State::IDLE; } void set_continuous(bool continuous) { this->continuous_ = continuous; } @@ -295,6 +301,10 @@ class VoiceAssistant : public Component { bool start_udp_socket_(); Configuration config_{}; + +#ifdef USE_MICRO_WAKE_WORD + micro_wake_word::MicroWakeWord *micro_wake_word_{nullptr}; +#endif }; template class StartAction : public Action, public Parented { diff --git a/tests/components/voice_assistant/common-idf.yaml b/tests/components/voice_assistant/common-idf.yaml new file mode 100644 index 0000000000..b1d249d5b4 --- /dev/null +++ b/tests/components/voice_assistant/common-idf.yaml @@ -0,0 +1,69 @@ +esphome: + on_boot: + then: + - voice_assistant.start + - voice_assistant.start_continuous + - voice_assistant.stop + +wifi: + ssid: MySSID + password: password1 + +api: + +i2s_audio: + i2s_lrclk_pin: ${i2s_lrclk_pin} + i2s_bclk_pin: ${i2s_bclk_pin} + i2s_mclk_pin: ${i2s_mclk_pin} + +micro_wake_word: + id: mww_id + on_wake_word_detected: + - voice_assistant.start: + wake_word: !lambda return wake_word; + models: + - model: okay_nabu + +microphone: + - platform: i2s_audio + id: mic_id_external + i2s_din_pin: ${i2s_din_pin} + adc_type: external + pdm: false + +speaker: + - platform: i2s_audio + id: speaker_id + dac_type: external + i2s_dout_pin: ${i2s_dout_pin} + +voice_assistant: + microphone: + microphone: mic_id_external + gain_factor: 4 + channels: 0 + speaker: speaker_id + micro_wake_word: mww_id + conversation_timeout: 60s + on_listening: + - logger.log: "Voice assistant microphone listening" + on_start: + - logger.log: "Voice assistant started" + on_stt_end: + - logger.log: + format: "Voice assistant STT ended with result %s" + args: [x.c_str()] + on_tts_start: + - logger.log: + format: "Voice assistant TTS started with text %s" + args: [x.c_str()] + on_tts_end: + - logger.log: + format: "Voice assistant TTS ended with url %s" + args: [x.c_str()] + on_end: + - logger.log: "Voice assistant ended" + on_error: + - logger.log: + format: "Voice assistant error - code %s, message: %s" + args: [code.c_str(), message.c_str()] diff --git a/tests/components/voice_assistant/test.esp32-c3-idf.yaml b/tests/components/voice_assistant/test.esp32-c3-idf.yaml index f596d927cb..46745e4308 100644 --- a/tests/components/voice_assistant/test.esp32-c3-idf.yaml +++ b/tests/components/voice_assistant/test.esp32-c3-idf.yaml @@ -5,4 +5,4 @@ substitutions: i2s_din_pin: GPIO3 i2s_dout_pin: GPIO2 -<<: !include common.yaml +<<: !include common-idf.yaml diff --git a/tests/components/voice_assistant/test.esp32-idf.yaml b/tests/components/voice_assistant/test.esp32-idf.yaml index f6e553f9dc..0fe5d347be 100644 --- a/tests/components/voice_assistant/test.esp32-idf.yaml +++ b/tests/components/voice_assistant/test.esp32-idf.yaml @@ -5,4 +5,4 @@ substitutions: i2s_din_pin: GPIO13 i2s_dout_pin: GPIO12 -<<: !include common.yaml +<<: !include common-idf.yaml