mirror of
https://github.com/esphome/esphome.git
synced 2025-07-28 14:16:40 +00:00
[voice_assistant] voice assistant can configure enabled wake words (#8657)
This commit is contained in:
parent
e215fafebe
commit
c7f597bc75
@ -1,7 +1,7 @@
|
|||||||
from esphome import automation
|
from esphome import automation
|
||||||
from esphome.automation import register_action, register_condition
|
from esphome.automation import register_action, register_condition
|
||||||
import esphome.codegen as cg
|
import esphome.codegen as cg
|
||||||
from esphome.components import media_player, microphone, speaker
|
from esphome.components import media_player, micro_wake_word, microphone, speaker
|
||||||
import esphome.config_validation as cv
|
import esphome.config_validation as cv
|
||||||
from esphome.const import (
|
from esphome.const import (
|
||||||
CONF_ID,
|
CONF_ID,
|
||||||
@ -41,6 +41,7 @@ CONF_AUTO_GAIN = "auto_gain"
|
|||||||
CONF_NOISE_SUPPRESSION_LEVEL = "noise_suppression_level"
|
CONF_NOISE_SUPPRESSION_LEVEL = "noise_suppression_level"
|
||||||
CONF_VOLUME_MULTIPLIER = "volume_multiplier"
|
CONF_VOLUME_MULTIPLIER = "volume_multiplier"
|
||||||
|
|
||||||
|
CONF_MICRO_WAKE_WORD = "micro_wake_word"
|
||||||
CONF_WAKE_WORD = "wake_word"
|
CONF_WAKE_WORD = "wake_word"
|
||||||
|
|
||||||
CONF_CONVERSATION_TIMEOUT = "conversation_timeout"
|
CONF_CONVERSATION_TIMEOUT = "conversation_timeout"
|
||||||
@ -96,11 +97,12 @@ CONFIG_SCHEMA = cv.All(
|
|||||||
min_channels=1,
|
min_channels=1,
|
||||||
max_channels=1,
|
max_channels=1,
|
||||||
),
|
),
|
||||||
cv.Exclusive(CONF_SPEAKER, "output"): cv.use_id(speaker.Speaker),
|
|
||||||
cv.Exclusive(CONF_MEDIA_PLAYER, "output"): cv.use_id(
|
cv.Exclusive(CONF_MEDIA_PLAYER, "output"): cv.use_id(
|
||||||
media_player.MediaPlayer
|
media_player.MediaPlayer
|
||||||
),
|
),
|
||||||
|
cv.Exclusive(CONF_SPEAKER, "output"): cv.use_id(speaker.Speaker),
|
||||||
cv.Optional(CONF_USE_WAKE_WORD, default=False): cv.boolean,
|
cv.Optional(CONF_USE_WAKE_WORD, default=False): cv.boolean,
|
||||||
|
cv.Optional(CONF_MICRO_WAKE_WORD): cv.use_id(micro_wake_word.MicroWakeWord),
|
||||||
cv.Optional(CONF_VAD_THRESHOLD): cv.invalid(
|
cv.Optional(CONF_VAD_THRESHOLD): cv.invalid(
|
||||||
"VAD threshold is no longer supported, as it requires the deprecated esp_adf external component. Use an i2s_audio microphone/speaker instead. Additionally, you may need to configure the audio_adc and audio_dac components depending on your hardware."
|
"VAD threshold is no longer supported, as it requires the deprecated esp_adf external component. Use an i2s_audio microphone/speaker instead. Additionally, you may need to configure the audio_adc and audio_dac components depending on your hardware."
|
||||||
),
|
),
|
||||||
@ -191,14 +193,18 @@ async def to_code(config):
|
|||||||
mic_source = await microphone.microphone_source_to_code(config[CONF_MICROPHONE])
|
mic_source = await microphone.microphone_source_to_code(config[CONF_MICROPHONE])
|
||||||
cg.add(var.set_microphone_source(mic_source))
|
cg.add(var.set_microphone_source(mic_source))
|
||||||
|
|
||||||
if CONF_SPEAKER in config:
|
if CONF_MICRO_WAKE_WORD in config:
|
||||||
spkr = await cg.get_variable(config[CONF_SPEAKER])
|
mww = await cg.get_variable(config[CONF_MICRO_WAKE_WORD])
|
||||||
cg.add(var.set_speaker(spkr))
|
cg.add(var.set_micro_wake_word(mww))
|
||||||
|
|
||||||
if CONF_MEDIA_PLAYER in config:
|
if CONF_MEDIA_PLAYER in config:
|
||||||
mp = await cg.get_variable(config[CONF_MEDIA_PLAYER])
|
mp = await cg.get_variable(config[CONF_MEDIA_PLAYER])
|
||||||
cg.add(var.set_media_player(mp))
|
cg.add(var.set_media_player(mp))
|
||||||
|
|
||||||
|
if CONF_SPEAKER in config:
|
||||||
|
spkr = await cg.get_variable(config[CONF_SPEAKER])
|
||||||
|
cg.add(var.set_speaker(spkr))
|
||||||
|
|
||||||
cg.add(var.set_use_wake_word(config[CONF_USE_WAKE_WORD]))
|
cg.add(var.set_use_wake_word(config[CONF_USE_WAKE_WORD]))
|
||||||
|
|
||||||
if (vad_threshold := config.get(CONF_VAD_THRESHOLD)) is not None:
|
if (vad_threshold := config.get(CONF_VAD_THRESHOLD)) is not None:
|
||||||
|
@ -869,6 +869,59 @@ void VoiceAssistant::on_announce(const api::VoiceAssistantAnnounceRequest &msg)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VoiceAssistant::on_set_configuration(const std::vector<std::string> &active_wake_words) {
|
||||||
|
#ifdef USE_MICRO_WAKE_WORD
|
||||||
|
if (this->micro_wake_word_) {
|
||||||
|
// Disable all wake words first
|
||||||
|
for (auto &model : this->micro_wake_word_->get_wake_words()) {
|
||||||
|
model->disable();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enable only active wake words
|
||||||
|
for (auto ww_id : active_wake_words) {
|
||||||
|
for (auto &model : this->micro_wake_word_->get_wake_words()) {
|
||||||
|
if (model->get_id() == ww_id) {
|
||||||
|
model->enable();
|
||||||
|
ESP_LOGD(TAG, "Enabled wake word: %s (id=%s)", model->get_wake_word().c_str(), model->get_id().c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
const Configuration &VoiceAssistant::get_configuration() {
|
||||||
|
this->config_.available_wake_words.clear();
|
||||||
|
this->config_.active_wake_words.clear();
|
||||||
|
|
||||||
|
#ifdef USE_MICRO_WAKE_WORD
|
||||||
|
if (this->micro_wake_word_) {
|
||||||
|
this->config_.max_active_wake_words = 1;
|
||||||
|
|
||||||
|
for (auto &model : this->micro_wake_word_->get_wake_words()) {
|
||||||
|
if (model->is_enabled()) {
|
||||||
|
this->config_.active_wake_words.push_back(model->get_id());
|
||||||
|
}
|
||||||
|
|
||||||
|
WakeWord wake_word;
|
||||||
|
wake_word.id = model->get_id();
|
||||||
|
wake_word.wake_word = model->get_wake_word();
|
||||||
|
for (const auto &lang : model->get_trained_languages()) {
|
||||||
|
wake_word.trained_languages.push_back(lang);
|
||||||
|
}
|
||||||
|
this->config_.available_wake_words.push_back(std::move(wake_word));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
#endif
|
||||||
|
// No microWakeWord
|
||||||
|
this->config_.max_active_wake_words = 0;
|
||||||
|
#ifdef USE_MICRO_WAKE_WORD
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return this->config_;
|
||||||
|
};
|
||||||
|
|
||||||
VoiceAssistant *global_voice_assistant = nullptr; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
|
VoiceAssistant *global_voice_assistant = nullptr; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
|
||||||
|
|
||||||
} // namespace voice_assistant
|
} // namespace voice_assistant
|
||||||
|
@ -12,12 +12,15 @@
|
|||||||
#include "esphome/components/api/api_connection.h"
|
#include "esphome/components/api/api_connection.h"
|
||||||
#include "esphome/components/api/api_pb2.h"
|
#include "esphome/components/api/api_pb2.h"
|
||||||
#include "esphome/components/microphone/microphone_source.h"
|
#include "esphome/components/microphone/microphone_source.h"
|
||||||
#ifdef USE_SPEAKER
|
|
||||||
#include "esphome/components/speaker/speaker.h"
|
|
||||||
#endif
|
|
||||||
#ifdef USE_MEDIA_PLAYER
|
#ifdef USE_MEDIA_PLAYER
|
||||||
#include "esphome/components/media_player/media_player.h"
|
#include "esphome/components/media_player/media_player.h"
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef USE_MICRO_WAKE_WORD
|
||||||
|
#include "esphome/components/micro_wake_word/micro_wake_word.h"
|
||||||
|
#endif
|
||||||
|
#ifdef USE_SPEAKER
|
||||||
|
#include "esphome/components/speaker/speaker.h"
|
||||||
|
#endif
|
||||||
#include "esphome/components/socket/socket.h"
|
#include "esphome/components/socket/socket.h"
|
||||||
|
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
@ -99,6 +102,9 @@ class VoiceAssistant : public Component {
|
|||||||
void failed_to_start();
|
void failed_to_start();
|
||||||
|
|
||||||
void set_microphone_source(microphone::MicrophoneSource *mic_source) { this->mic_source_ = mic_source; }
|
void set_microphone_source(microphone::MicrophoneSource *mic_source) { this->mic_source_ = mic_source; }
|
||||||
|
#ifdef USE_MICRO_WAKE_WORD
|
||||||
|
void set_micro_wake_word(micro_wake_word::MicroWakeWord *mww) { this->micro_wake_word_ = mww; }
|
||||||
|
#endif
|
||||||
#ifdef USE_SPEAKER
|
#ifdef USE_SPEAKER
|
||||||
void set_speaker(speaker::Speaker *speaker) {
|
void set_speaker(speaker::Speaker *speaker) {
|
||||||
this->speaker_ = speaker;
|
this->speaker_ = speaker;
|
||||||
@ -152,8 +158,8 @@ class VoiceAssistant : public Component {
|
|||||||
void on_audio(const api::VoiceAssistantAudio &msg);
|
void on_audio(const api::VoiceAssistantAudio &msg);
|
||||||
void on_timer_event(const api::VoiceAssistantTimerEventResponse &msg);
|
void on_timer_event(const api::VoiceAssistantTimerEventResponse &msg);
|
||||||
void on_announce(const api::VoiceAssistantAnnounceRequest &msg);
|
void on_announce(const api::VoiceAssistantAnnounceRequest &msg);
|
||||||
void on_set_configuration(const std::vector<std::string> &active_wake_words){};
|
void on_set_configuration(const std::vector<std::string> &active_wake_words);
|
||||||
const Configuration &get_configuration() { return this->config_; };
|
const Configuration &get_configuration();
|
||||||
|
|
||||||
bool is_running() const { return this->state_ != State::IDLE; }
|
bool is_running() const { return this->state_ != State::IDLE; }
|
||||||
void set_continuous(bool continuous) { this->continuous_ = continuous; }
|
void set_continuous(bool continuous) { this->continuous_ = continuous; }
|
||||||
@ -295,6 +301,10 @@ class VoiceAssistant : public Component {
|
|||||||
bool start_udp_socket_();
|
bool start_udp_socket_();
|
||||||
|
|
||||||
Configuration config_{};
|
Configuration config_{};
|
||||||
|
|
||||||
|
#ifdef USE_MICRO_WAKE_WORD
|
||||||
|
micro_wake_word::MicroWakeWord *micro_wake_word_{nullptr};
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<VoiceAssistant> {
|
template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<VoiceAssistant> {
|
||||||
|
69
tests/components/voice_assistant/common-idf.yaml
Normal file
69
tests/components/voice_assistant/common-idf.yaml
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
esphome:
|
||||||
|
on_boot:
|
||||||
|
then:
|
||||||
|
- voice_assistant.start
|
||||||
|
- voice_assistant.start_continuous
|
||||||
|
- voice_assistant.stop
|
||||||
|
|
||||||
|
wifi:
|
||||||
|
ssid: MySSID
|
||||||
|
password: password1
|
||||||
|
|
||||||
|
api:
|
||||||
|
|
||||||
|
i2s_audio:
|
||||||
|
i2s_lrclk_pin: ${i2s_lrclk_pin}
|
||||||
|
i2s_bclk_pin: ${i2s_bclk_pin}
|
||||||
|
i2s_mclk_pin: ${i2s_mclk_pin}
|
||||||
|
|
||||||
|
micro_wake_word:
|
||||||
|
id: mww_id
|
||||||
|
on_wake_word_detected:
|
||||||
|
- voice_assistant.start:
|
||||||
|
wake_word: !lambda return wake_word;
|
||||||
|
models:
|
||||||
|
- model: okay_nabu
|
||||||
|
|
||||||
|
microphone:
|
||||||
|
- platform: i2s_audio
|
||||||
|
id: mic_id_external
|
||||||
|
i2s_din_pin: ${i2s_din_pin}
|
||||||
|
adc_type: external
|
||||||
|
pdm: false
|
||||||
|
|
||||||
|
speaker:
|
||||||
|
- platform: i2s_audio
|
||||||
|
id: speaker_id
|
||||||
|
dac_type: external
|
||||||
|
i2s_dout_pin: ${i2s_dout_pin}
|
||||||
|
|
||||||
|
voice_assistant:
|
||||||
|
microphone:
|
||||||
|
microphone: mic_id_external
|
||||||
|
gain_factor: 4
|
||||||
|
channels: 0
|
||||||
|
speaker: speaker_id
|
||||||
|
micro_wake_word: mww_id
|
||||||
|
conversation_timeout: 60s
|
||||||
|
on_listening:
|
||||||
|
- logger.log: "Voice assistant microphone listening"
|
||||||
|
on_start:
|
||||||
|
- logger.log: "Voice assistant started"
|
||||||
|
on_stt_end:
|
||||||
|
- logger.log:
|
||||||
|
format: "Voice assistant STT ended with result %s"
|
||||||
|
args: [x.c_str()]
|
||||||
|
on_tts_start:
|
||||||
|
- logger.log:
|
||||||
|
format: "Voice assistant TTS started with text %s"
|
||||||
|
args: [x.c_str()]
|
||||||
|
on_tts_end:
|
||||||
|
- logger.log:
|
||||||
|
format: "Voice assistant TTS ended with url %s"
|
||||||
|
args: [x.c_str()]
|
||||||
|
on_end:
|
||||||
|
- logger.log: "Voice assistant ended"
|
||||||
|
on_error:
|
||||||
|
- logger.log:
|
||||||
|
format: "Voice assistant error - code %s, message: %s"
|
||||||
|
args: [code.c_str(), message.c_str()]
|
@ -5,4 +5,4 @@ substitutions:
|
|||||||
i2s_din_pin: GPIO3
|
i2s_din_pin: GPIO3
|
||||||
i2s_dout_pin: GPIO2
|
i2s_dout_pin: GPIO2
|
||||||
|
|
||||||
<<: !include common.yaml
|
<<: !include common-idf.yaml
|
||||||
|
@ -5,4 +5,4 @@ substitutions:
|
|||||||
i2s_din_pin: GPIO13
|
i2s_din_pin: GPIO13
|
||||||
i2s_dout_pin: GPIO12
|
i2s_dout_pin: GPIO12
|
||||||
|
|
||||||
<<: !include common.yaml
|
<<: !include common-idf.yaml
|
||||||
|
Loading…
x
Reference in New Issue
Block a user