mirror of
https://github.com/esphome/esphome.git
synced 2025-07-28 14:16:40 +00:00
[voice_assistant] voice assistant can configure enabled wake words (#8657)
This commit is contained in:
parent
e215fafebe
commit
c7f597bc75
@ -1,7 +1,7 @@
|
||||
from esphome import automation
|
||||
from esphome.automation import register_action, register_condition
|
||||
import esphome.codegen as cg
|
||||
from esphome.components import media_player, microphone, speaker
|
||||
from esphome.components import media_player, micro_wake_word, microphone, speaker
|
||||
import esphome.config_validation as cv
|
||||
from esphome.const import (
|
||||
CONF_ID,
|
||||
@ -41,6 +41,7 @@ CONF_AUTO_GAIN = "auto_gain"
|
||||
CONF_NOISE_SUPPRESSION_LEVEL = "noise_suppression_level"
|
||||
CONF_VOLUME_MULTIPLIER = "volume_multiplier"
|
||||
|
||||
CONF_MICRO_WAKE_WORD = "micro_wake_word"
|
||||
CONF_WAKE_WORD = "wake_word"
|
||||
|
||||
CONF_CONVERSATION_TIMEOUT = "conversation_timeout"
|
||||
@ -96,11 +97,12 @@ CONFIG_SCHEMA = cv.All(
|
||||
min_channels=1,
|
||||
max_channels=1,
|
||||
),
|
||||
cv.Exclusive(CONF_SPEAKER, "output"): cv.use_id(speaker.Speaker),
|
||||
cv.Exclusive(CONF_MEDIA_PLAYER, "output"): cv.use_id(
|
||||
media_player.MediaPlayer
|
||||
),
|
||||
cv.Exclusive(CONF_SPEAKER, "output"): cv.use_id(speaker.Speaker),
|
||||
cv.Optional(CONF_USE_WAKE_WORD, default=False): cv.boolean,
|
||||
cv.Optional(CONF_MICRO_WAKE_WORD): cv.use_id(micro_wake_word.MicroWakeWord),
|
||||
cv.Optional(CONF_VAD_THRESHOLD): cv.invalid(
|
||||
"VAD threshold is no longer supported, as it requires the deprecated esp_adf external component. Use an i2s_audio microphone/speaker instead. Additionally, you may need to configure the audio_adc and audio_dac components depending on your hardware."
|
||||
),
|
||||
@ -191,14 +193,18 @@ async def to_code(config):
|
||||
mic_source = await microphone.microphone_source_to_code(config[CONF_MICROPHONE])
|
||||
cg.add(var.set_microphone_source(mic_source))
|
||||
|
||||
if CONF_SPEAKER in config:
|
||||
spkr = await cg.get_variable(config[CONF_SPEAKER])
|
||||
cg.add(var.set_speaker(spkr))
|
||||
if CONF_MICRO_WAKE_WORD in config:
|
||||
mww = await cg.get_variable(config[CONF_MICRO_WAKE_WORD])
|
||||
cg.add(var.set_micro_wake_word(mww))
|
||||
|
||||
if CONF_MEDIA_PLAYER in config:
|
||||
mp = await cg.get_variable(config[CONF_MEDIA_PLAYER])
|
||||
cg.add(var.set_media_player(mp))
|
||||
|
||||
if CONF_SPEAKER in config:
|
||||
spkr = await cg.get_variable(config[CONF_SPEAKER])
|
||||
cg.add(var.set_speaker(spkr))
|
||||
|
||||
cg.add(var.set_use_wake_word(config[CONF_USE_WAKE_WORD]))
|
||||
|
||||
if (vad_threshold := config.get(CONF_VAD_THRESHOLD)) is not None:
|
||||
|
@ -869,6 +869,59 @@ void VoiceAssistant::on_announce(const api::VoiceAssistantAnnounceRequest &msg)
|
||||
#endif
|
||||
}
|
||||
|
||||
void VoiceAssistant::on_set_configuration(const std::vector<std::string> &active_wake_words) {
|
||||
#ifdef USE_MICRO_WAKE_WORD
|
||||
if (this->micro_wake_word_) {
|
||||
// Disable all wake words first
|
||||
for (auto &model : this->micro_wake_word_->get_wake_words()) {
|
||||
model->disable();
|
||||
}
|
||||
|
||||
// Enable only active wake words
|
||||
for (auto ww_id : active_wake_words) {
|
||||
for (auto &model : this->micro_wake_word_->get_wake_words()) {
|
||||
if (model->get_id() == ww_id) {
|
||||
model->enable();
|
||||
ESP_LOGD(TAG, "Enabled wake word: %s (id=%s)", model->get_wake_word().c_str(), model->get_id().c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
const Configuration &VoiceAssistant::get_configuration() {
|
||||
this->config_.available_wake_words.clear();
|
||||
this->config_.active_wake_words.clear();
|
||||
|
||||
#ifdef USE_MICRO_WAKE_WORD
|
||||
if (this->micro_wake_word_) {
|
||||
this->config_.max_active_wake_words = 1;
|
||||
|
||||
for (auto &model : this->micro_wake_word_->get_wake_words()) {
|
||||
if (model->is_enabled()) {
|
||||
this->config_.active_wake_words.push_back(model->get_id());
|
||||
}
|
||||
|
||||
WakeWord wake_word;
|
||||
wake_word.id = model->get_id();
|
||||
wake_word.wake_word = model->get_wake_word();
|
||||
for (const auto &lang : model->get_trained_languages()) {
|
||||
wake_word.trained_languages.push_back(lang);
|
||||
}
|
||||
this->config_.available_wake_words.push_back(std::move(wake_word));
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
// No microWakeWord
|
||||
this->config_.max_active_wake_words = 0;
|
||||
#ifdef USE_MICRO_WAKE_WORD
|
||||
}
|
||||
#endif
|
||||
|
||||
return this->config_;
|
||||
};
|
||||
|
||||
VoiceAssistant *global_voice_assistant = nullptr; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
|
||||
} // namespace voice_assistant
|
||||
|
@ -12,12 +12,15 @@
|
||||
#include "esphome/components/api/api_connection.h"
|
||||
#include "esphome/components/api/api_pb2.h"
|
||||
#include "esphome/components/microphone/microphone_source.h"
|
||||
#ifdef USE_SPEAKER
|
||||
#include "esphome/components/speaker/speaker.h"
|
||||
#endif
|
||||
#ifdef USE_MEDIA_PLAYER
|
||||
#include "esphome/components/media_player/media_player.h"
|
||||
#endif
|
||||
#ifdef USE_MICRO_WAKE_WORD
|
||||
#include "esphome/components/micro_wake_word/micro_wake_word.h"
|
||||
#endif
|
||||
#ifdef USE_SPEAKER
|
||||
#include "esphome/components/speaker/speaker.h"
|
||||
#endif
|
||||
#include "esphome/components/socket/socket.h"
|
||||
|
||||
#include <unordered_map>
|
||||
@ -99,6 +102,9 @@ class VoiceAssistant : public Component {
|
||||
void failed_to_start();
|
||||
|
||||
void set_microphone_source(microphone::MicrophoneSource *mic_source) { this->mic_source_ = mic_source; }
|
||||
#ifdef USE_MICRO_WAKE_WORD
|
||||
void set_micro_wake_word(micro_wake_word::MicroWakeWord *mww) { this->micro_wake_word_ = mww; }
|
||||
#endif
|
||||
#ifdef USE_SPEAKER
|
||||
void set_speaker(speaker::Speaker *speaker) {
|
||||
this->speaker_ = speaker;
|
||||
@ -152,8 +158,8 @@ class VoiceAssistant : public Component {
|
||||
void on_audio(const api::VoiceAssistantAudio &msg);
|
||||
void on_timer_event(const api::VoiceAssistantTimerEventResponse &msg);
|
||||
void on_announce(const api::VoiceAssistantAnnounceRequest &msg);
|
||||
void on_set_configuration(const std::vector<std::string> &active_wake_words){};
|
||||
const Configuration &get_configuration() { return this->config_; };
|
||||
void on_set_configuration(const std::vector<std::string> &active_wake_words);
|
||||
const Configuration &get_configuration();
|
||||
|
||||
bool is_running() const { return this->state_ != State::IDLE; }
|
||||
void set_continuous(bool continuous) { this->continuous_ = continuous; }
|
||||
@ -295,6 +301,10 @@ class VoiceAssistant : public Component {
|
||||
bool start_udp_socket_();
|
||||
|
||||
Configuration config_{};
|
||||
|
||||
#ifdef USE_MICRO_WAKE_WORD
|
||||
micro_wake_word::MicroWakeWord *micro_wake_word_{nullptr};
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<VoiceAssistant> {
|
||||
|
69
tests/components/voice_assistant/common-idf.yaml
Normal file
69
tests/components/voice_assistant/common-idf.yaml
Normal file
@ -0,0 +1,69 @@
|
||||
esphome:
|
||||
on_boot:
|
||||
then:
|
||||
- voice_assistant.start
|
||||
- voice_assistant.start_continuous
|
||||
- voice_assistant.stop
|
||||
|
||||
wifi:
|
||||
ssid: MySSID
|
||||
password: password1
|
||||
|
||||
api:
|
||||
|
||||
i2s_audio:
|
||||
i2s_lrclk_pin: ${i2s_lrclk_pin}
|
||||
i2s_bclk_pin: ${i2s_bclk_pin}
|
||||
i2s_mclk_pin: ${i2s_mclk_pin}
|
||||
|
||||
micro_wake_word:
|
||||
id: mww_id
|
||||
on_wake_word_detected:
|
||||
- voice_assistant.start:
|
||||
wake_word: !lambda return wake_word;
|
||||
models:
|
||||
- model: okay_nabu
|
||||
|
||||
microphone:
|
||||
- platform: i2s_audio
|
||||
id: mic_id_external
|
||||
i2s_din_pin: ${i2s_din_pin}
|
||||
adc_type: external
|
||||
pdm: false
|
||||
|
||||
speaker:
|
||||
- platform: i2s_audio
|
||||
id: speaker_id
|
||||
dac_type: external
|
||||
i2s_dout_pin: ${i2s_dout_pin}
|
||||
|
||||
voice_assistant:
|
||||
microphone:
|
||||
microphone: mic_id_external
|
||||
gain_factor: 4
|
||||
channels: 0
|
||||
speaker: speaker_id
|
||||
micro_wake_word: mww_id
|
||||
conversation_timeout: 60s
|
||||
on_listening:
|
||||
- logger.log: "Voice assistant microphone listening"
|
||||
on_start:
|
||||
- logger.log: "Voice assistant started"
|
||||
on_stt_end:
|
||||
- logger.log:
|
||||
format: "Voice assistant STT ended with result %s"
|
||||
args: [x.c_str()]
|
||||
on_tts_start:
|
||||
- logger.log:
|
||||
format: "Voice assistant TTS started with text %s"
|
||||
args: [x.c_str()]
|
||||
on_tts_end:
|
||||
- logger.log:
|
||||
format: "Voice assistant TTS ended with url %s"
|
||||
args: [x.c_str()]
|
||||
on_end:
|
||||
- logger.log: "Voice assistant ended"
|
||||
on_error:
|
||||
- logger.log:
|
||||
format: "Voice assistant error - code %s, message: %s"
|
||||
args: [code.c_str(), message.c_str()]
|
@ -5,4 +5,4 @@ substitutions:
|
||||
i2s_din_pin: GPIO3
|
||||
i2s_dout_pin: GPIO2
|
||||
|
||||
<<: !include common.yaml
|
||||
<<: !include common-idf.yaml
|
||||
|
@ -5,4 +5,4 @@ substitutions:
|
||||
i2s_din_pin: GPIO13
|
||||
i2s_dout_pin: GPIO12
|
||||
|
||||
<<: !include common.yaml
|
||||
<<: !include common-idf.yaml
|
||||
|
Loading…
x
Reference in New Issue
Block a user