[voice_assistant] voice assistant can configure enabled wake words (#8657)

This commit is contained in:
Kevin Ahrendt 2025-05-01 06:11:09 -05:00 committed by GitHub
parent e215fafebe
commit c7f597bc75
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 150 additions and 12 deletions

View File

@ -1,7 +1,7 @@
from esphome import automation from esphome import automation
from esphome.automation import register_action, register_condition from esphome.automation import register_action, register_condition
import esphome.codegen as cg import esphome.codegen as cg
from esphome.components import media_player, microphone, speaker from esphome.components import media_player, micro_wake_word, microphone, speaker
import esphome.config_validation as cv import esphome.config_validation as cv
from esphome.const import ( from esphome.const import (
CONF_ID, CONF_ID,
@ -41,6 +41,7 @@ CONF_AUTO_GAIN = "auto_gain"
CONF_NOISE_SUPPRESSION_LEVEL = "noise_suppression_level" CONF_NOISE_SUPPRESSION_LEVEL = "noise_suppression_level"
CONF_VOLUME_MULTIPLIER = "volume_multiplier" CONF_VOLUME_MULTIPLIER = "volume_multiplier"
CONF_MICRO_WAKE_WORD = "micro_wake_word"
CONF_WAKE_WORD = "wake_word" CONF_WAKE_WORD = "wake_word"
CONF_CONVERSATION_TIMEOUT = "conversation_timeout" CONF_CONVERSATION_TIMEOUT = "conversation_timeout"
@ -96,11 +97,12 @@ CONFIG_SCHEMA = cv.All(
min_channels=1, min_channels=1,
max_channels=1, max_channels=1,
), ),
cv.Exclusive(CONF_SPEAKER, "output"): cv.use_id(speaker.Speaker),
cv.Exclusive(CONF_MEDIA_PLAYER, "output"): cv.use_id( cv.Exclusive(CONF_MEDIA_PLAYER, "output"): cv.use_id(
media_player.MediaPlayer media_player.MediaPlayer
), ),
cv.Exclusive(CONF_SPEAKER, "output"): cv.use_id(speaker.Speaker),
cv.Optional(CONF_USE_WAKE_WORD, default=False): cv.boolean, cv.Optional(CONF_USE_WAKE_WORD, default=False): cv.boolean,
cv.Optional(CONF_MICRO_WAKE_WORD): cv.use_id(micro_wake_word.MicroWakeWord),
cv.Optional(CONF_VAD_THRESHOLD): cv.invalid( cv.Optional(CONF_VAD_THRESHOLD): cv.invalid(
"VAD threshold is no longer supported, as it requires the deprecated esp_adf external component. Use an i2s_audio microphone/speaker instead. Additionally, you may need to configure the audio_adc and audio_dac components depending on your hardware." "VAD threshold is no longer supported, as it requires the deprecated esp_adf external component. Use an i2s_audio microphone/speaker instead. Additionally, you may need to configure the audio_adc and audio_dac components depending on your hardware."
), ),
@ -191,14 +193,18 @@ async def to_code(config):
mic_source = await microphone.microphone_source_to_code(config[CONF_MICROPHONE]) mic_source = await microphone.microphone_source_to_code(config[CONF_MICROPHONE])
cg.add(var.set_microphone_source(mic_source)) cg.add(var.set_microphone_source(mic_source))
if CONF_SPEAKER in config: if CONF_MICRO_WAKE_WORD in config:
spkr = await cg.get_variable(config[CONF_SPEAKER]) mww = await cg.get_variable(config[CONF_MICRO_WAKE_WORD])
cg.add(var.set_speaker(spkr)) cg.add(var.set_micro_wake_word(mww))
if CONF_MEDIA_PLAYER in config: if CONF_MEDIA_PLAYER in config:
mp = await cg.get_variable(config[CONF_MEDIA_PLAYER]) mp = await cg.get_variable(config[CONF_MEDIA_PLAYER])
cg.add(var.set_media_player(mp)) cg.add(var.set_media_player(mp))
if CONF_SPEAKER in config:
spkr = await cg.get_variable(config[CONF_SPEAKER])
cg.add(var.set_speaker(spkr))
cg.add(var.set_use_wake_word(config[CONF_USE_WAKE_WORD])) cg.add(var.set_use_wake_word(config[CONF_USE_WAKE_WORD]))
if (vad_threshold := config.get(CONF_VAD_THRESHOLD)) is not None: if (vad_threshold := config.get(CONF_VAD_THRESHOLD)) is not None:

View File

@ -869,6 +869,59 @@ void VoiceAssistant::on_announce(const api::VoiceAssistantAnnounceRequest &msg)
#endif #endif
} }
void VoiceAssistant::on_set_configuration(const std::vector<std::string> &active_wake_words) {
#ifdef USE_MICRO_WAKE_WORD
if (this->micro_wake_word_) {
// Disable all wake words first
for (auto &model : this->micro_wake_word_->get_wake_words()) {
model->disable();
}
// Enable only active wake words
for (auto ww_id : active_wake_words) {
for (auto &model : this->micro_wake_word_->get_wake_words()) {
if (model->get_id() == ww_id) {
model->enable();
ESP_LOGD(TAG, "Enabled wake word: %s (id=%s)", model->get_wake_word().c_str(), model->get_id().c_str());
}
}
}
}
#endif
};
const Configuration &VoiceAssistant::get_configuration() {
this->config_.available_wake_words.clear();
this->config_.active_wake_words.clear();
#ifdef USE_MICRO_WAKE_WORD
if (this->micro_wake_word_) {
this->config_.max_active_wake_words = 1;
for (auto &model : this->micro_wake_word_->get_wake_words()) {
if (model->is_enabled()) {
this->config_.active_wake_words.push_back(model->get_id());
}
WakeWord wake_word;
wake_word.id = model->get_id();
wake_word.wake_word = model->get_wake_word();
for (const auto &lang : model->get_trained_languages()) {
wake_word.trained_languages.push_back(lang);
}
this->config_.available_wake_words.push_back(std::move(wake_word));
}
} else {
#endif
// No microWakeWord
this->config_.max_active_wake_words = 0;
#ifdef USE_MICRO_WAKE_WORD
}
#endif
return this->config_;
};
VoiceAssistant *global_voice_assistant = nullptr; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) VoiceAssistant *global_voice_assistant = nullptr; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
} // namespace voice_assistant } // namespace voice_assistant

View File

@ -12,12 +12,15 @@
#include "esphome/components/api/api_connection.h" #include "esphome/components/api/api_connection.h"
#include "esphome/components/api/api_pb2.h" #include "esphome/components/api/api_pb2.h"
#include "esphome/components/microphone/microphone_source.h" #include "esphome/components/microphone/microphone_source.h"
#ifdef USE_SPEAKER
#include "esphome/components/speaker/speaker.h"
#endif
#ifdef USE_MEDIA_PLAYER #ifdef USE_MEDIA_PLAYER
#include "esphome/components/media_player/media_player.h" #include "esphome/components/media_player/media_player.h"
#endif #endif
#ifdef USE_MICRO_WAKE_WORD
#include "esphome/components/micro_wake_word/micro_wake_word.h"
#endif
#ifdef USE_SPEAKER
#include "esphome/components/speaker/speaker.h"
#endif
#include "esphome/components/socket/socket.h" #include "esphome/components/socket/socket.h"
#include <unordered_map> #include <unordered_map>
@ -99,6 +102,9 @@ class VoiceAssistant : public Component {
void failed_to_start(); void failed_to_start();
void set_microphone_source(microphone::MicrophoneSource *mic_source) { this->mic_source_ = mic_source; } void set_microphone_source(microphone::MicrophoneSource *mic_source) { this->mic_source_ = mic_source; }
#ifdef USE_MICRO_WAKE_WORD
void set_micro_wake_word(micro_wake_word::MicroWakeWord *mww) { this->micro_wake_word_ = mww; }
#endif
#ifdef USE_SPEAKER #ifdef USE_SPEAKER
void set_speaker(speaker::Speaker *speaker) { void set_speaker(speaker::Speaker *speaker) {
this->speaker_ = speaker; this->speaker_ = speaker;
@ -152,8 +158,8 @@ class VoiceAssistant : public Component {
void on_audio(const api::VoiceAssistantAudio &msg); void on_audio(const api::VoiceAssistantAudio &msg);
void on_timer_event(const api::VoiceAssistantTimerEventResponse &msg); void on_timer_event(const api::VoiceAssistantTimerEventResponse &msg);
void on_announce(const api::VoiceAssistantAnnounceRequest &msg); void on_announce(const api::VoiceAssistantAnnounceRequest &msg);
void on_set_configuration(const std::vector<std::string> &active_wake_words){}; void on_set_configuration(const std::vector<std::string> &active_wake_words);
const Configuration &get_configuration() { return this->config_; }; const Configuration &get_configuration();
bool is_running() const { return this->state_ != State::IDLE; } bool is_running() const { return this->state_ != State::IDLE; }
void set_continuous(bool continuous) { this->continuous_ = continuous; } void set_continuous(bool continuous) { this->continuous_ = continuous; }
@ -295,6 +301,10 @@ class VoiceAssistant : public Component {
bool start_udp_socket_(); bool start_udp_socket_();
Configuration config_{}; Configuration config_{};
#ifdef USE_MICRO_WAKE_WORD
micro_wake_word::MicroWakeWord *micro_wake_word_{nullptr};
#endif
}; };
template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<VoiceAssistant> { template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<VoiceAssistant> {

View File

@ -0,0 +1,69 @@
esphome:
on_boot:
then:
- voice_assistant.start
- voice_assistant.start_continuous
- voice_assistant.stop
wifi:
ssid: MySSID
password: password1
api:
i2s_audio:
i2s_lrclk_pin: ${i2s_lrclk_pin}
i2s_bclk_pin: ${i2s_bclk_pin}
i2s_mclk_pin: ${i2s_mclk_pin}
micro_wake_word:
id: mww_id
on_wake_word_detected:
- voice_assistant.start:
wake_word: !lambda return wake_word;
models:
- model: okay_nabu
microphone:
- platform: i2s_audio
id: mic_id_external
i2s_din_pin: ${i2s_din_pin}
adc_type: external
pdm: false
speaker:
- platform: i2s_audio
id: speaker_id
dac_type: external
i2s_dout_pin: ${i2s_dout_pin}
voice_assistant:
microphone:
microphone: mic_id_external
gain_factor: 4
channels: 0
speaker: speaker_id
micro_wake_word: mww_id
conversation_timeout: 60s
on_listening:
- logger.log: "Voice assistant microphone listening"
on_start:
- logger.log: "Voice assistant started"
on_stt_end:
- logger.log:
format: "Voice assistant STT ended with result %s"
args: [x.c_str()]
on_tts_start:
- logger.log:
format: "Voice assistant TTS started with text %s"
args: [x.c_str()]
on_tts_end:
- logger.log:
format: "Voice assistant TTS ended with url %s"
args: [x.c_str()]
on_end:
- logger.log: "Voice assistant ended"
on_error:
- logger.log:
format: "Voice assistant error - code %s, message: %s"
args: [code.c_str(), message.c_str()]

View File

@ -5,4 +5,4 @@ substitutions:
i2s_din_pin: GPIO3 i2s_din_pin: GPIO3
i2s_dout_pin: GPIO2 i2s_dout_pin: GPIO2
<<: !include common.yaml <<: !include common-idf.yaml

View File

@ -5,4 +5,4 @@ substitutions:
i2s_din_pin: GPIO13 i2s_din_pin: GPIO13
i2s_dout_pin: GPIO12 i2s_dout_pin: GPIO12
<<: !include common.yaml <<: !include common-idf.yaml