mirror of
https://github.com/esphome/esphome.git
synced 2025-07-28 14:16:40 +00:00
[micro_wake_word] Use microphone callback and avoid unnecessary allocation attempts (#8626)
This commit is contained in:
parent
e557bca420
commit
ee646d7324
@ -61,6 +61,29 @@ void MicroWakeWord::dump_config() {
|
|||||||
void MicroWakeWord::setup() {
|
void MicroWakeWord::setup() {
|
||||||
ESP_LOGCONFIG(TAG, "Setting up microWakeWord...");
|
ESP_LOGCONFIG(TAG, "Setting up microWakeWord...");
|
||||||
|
|
||||||
|
this->microphone_->add_data_callback([this](const std::vector<int16_t> &data) {
|
||||||
|
if (this->state_ != State::DETECTING_WAKE_WORD) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::shared_ptr<RingBuffer> temp_ring_buffer = this->ring_buffer_;
|
||||||
|
if (this->ring_buffer_.use_count() == 2) {
|
||||||
|
// mWW still owns the ring buffer and temp_ring_buffer does as well, proceed to copy audio into ring buffer
|
||||||
|
|
||||||
|
size_t bytes_free = temp_ring_buffer->free();
|
||||||
|
|
||||||
|
if (bytes_free < data.size() * sizeof(int16_t)) {
|
||||||
|
ESP_LOGW(
|
||||||
|
TAG,
|
||||||
|
"Not enough free bytes in ring buffer to store incoming audio data (free bytes=%d, incoming bytes=%d). "
|
||||||
|
"Resetting the ring buffer. Wake word detection accuracy will be reduced.",
|
||||||
|
bytes_free, data.size());
|
||||||
|
|
||||||
|
temp_ring_buffer->reset();
|
||||||
|
}
|
||||||
|
temp_ring_buffer->write((void *) data.data(), data.size() * sizeof(int16_t));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
if (!this->register_streaming_ops_(this->streaming_op_resolver_)) {
|
if (!this->register_streaming_ops_(this->streaming_op_resolver_)) {
|
||||||
this->mark_failed();
|
this->mark_failed();
|
||||||
return;
|
return;
|
||||||
@ -107,7 +130,6 @@ void MicroWakeWord::loop() {
|
|||||||
ESP_LOGD(TAG, "Starting Microphone");
|
ESP_LOGD(TAG, "Starting Microphone");
|
||||||
this->microphone_->start();
|
this->microphone_->start();
|
||||||
this->set_state_(State::STARTING_MICROPHONE);
|
this->set_state_(State::STARTING_MICROPHONE);
|
||||||
this->high_freq_.start();
|
|
||||||
break;
|
break;
|
||||||
case State::STARTING_MICROPHONE:
|
case State::STARTING_MICROPHONE:
|
||||||
if (this->microphone_->is_running()) {
|
if (this->microphone_->is_running()) {
|
||||||
@ -115,21 +137,19 @@ void MicroWakeWord::loop() {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case State::DETECTING_WAKE_WORD:
|
case State::DETECTING_WAKE_WORD:
|
||||||
while (!this->has_enough_samples_()) {
|
while (this->has_enough_samples_()) {
|
||||||
this->read_microphone_();
|
this->update_model_probabilities_();
|
||||||
}
|
if (this->detect_wake_words_()) {
|
||||||
this->update_model_probabilities_();
|
ESP_LOGD(TAG, "Wake Word '%s' Detected", (this->detected_wake_word_).c_str());
|
||||||
if (this->detect_wake_words_()) {
|
this->detected_ = true;
|
||||||
ESP_LOGD(TAG, "Wake Word '%s' Detected", (this->detected_wake_word_).c_str());
|
this->set_state_(State::STOP_MICROPHONE);
|
||||||
this->detected_ = true;
|
}
|
||||||
this->set_state_(State::STOP_MICROPHONE);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case State::STOP_MICROPHONE:
|
case State::STOP_MICROPHONE:
|
||||||
ESP_LOGD(TAG, "Stopping Microphone");
|
ESP_LOGD(TAG, "Stopping Microphone");
|
||||||
this->microphone_->stop();
|
this->microphone_->stop();
|
||||||
this->set_state_(State::STOPPING_MICROPHONE);
|
this->set_state_(State::STOPPING_MICROPHONE);
|
||||||
this->high_freq_.stop();
|
|
||||||
this->unload_models_();
|
this->unload_models_();
|
||||||
this->deallocate_buffers_();
|
this->deallocate_buffers_();
|
||||||
break;
|
break;
|
||||||
@ -157,6 +177,11 @@ void MicroWakeWord::start() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (this->state_ != State::IDLE) {
|
||||||
|
ESP_LOGW(TAG, "Wake word is already running");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!this->load_models_() || !this->allocate_buffers_()) {
|
if (!this->load_models_() || !this->allocate_buffers_()) {
|
||||||
ESP_LOGE(TAG, "Failed to load the wake word model(s) or allocate buffers");
|
ESP_LOGE(TAG, "Failed to load the wake word model(s) or allocate buffers");
|
||||||
this->status_set_error();
|
this->status_set_error();
|
||||||
@ -169,11 +194,6 @@ void MicroWakeWord::start() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->state_ != State::IDLE) {
|
|
||||||
ESP_LOGW(TAG, "Wake word is already running");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
this->reset_states_();
|
this->reset_states_();
|
||||||
this->set_state_(State::START_MICROPHONE);
|
this->set_state_(State::START_MICROPHONE);
|
||||||
}
|
}
|
||||||
@ -196,26 +216,6 @@ void MicroWakeWord::set_state_(State state) {
|
|||||||
this->state_ = state;
|
this->state_ = state;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t MicroWakeWord::read_microphone_() {
|
|
||||||
size_t bytes_read = this->microphone_->read(this->input_buffer_, INPUT_BUFFER_SIZE * sizeof(int16_t));
|
|
||||||
if (bytes_read == 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t bytes_free = this->ring_buffer_->free();
|
|
||||||
|
|
||||||
if (bytes_free < bytes_read) {
|
|
||||||
ESP_LOGW(TAG,
|
|
||||||
"Not enough free bytes in ring buffer to store incoming audio data (free bytes=%d, incoming bytes=%d). "
|
|
||||||
"Resetting the ring buffer. Wake word detection accuracy will be reduced.",
|
|
||||||
bytes_free, bytes_read);
|
|
||||||
|
|
||||||
this->ring_buffer_->reset();
|
|
||||||
}
|
|
||||||
|
|
||||||
return this->ring_buffer_->write((void *) this->input_buffer_, bytes_read);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool MicroWakeWord::allocate_buffers_() {
|
bool MicroWakeWord::allocate_buffers_() {
|
||||||
ExternalRAMAllocator<int16_t> audio_samples_allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
|
ExternalRAMAllocator<int16_t> audio_samples_allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
|
||||||
|
|
||||||
@ -235,9 +235,9 @@ bool MicroWakeWord::allocate_buffers_() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->ring_buffer_ == nullptr) {
|
if (this->ring_buffer_.use_count() == 0) {
|
||||||
this->ring_buffer_ = RingBuffer::create(BUFFER_SIZE * sizeof(int16_t));
|
this->ring_buffer_ = RingBuffer::create(BUFFER_SIZE * sizeof(int16_t));
|
||||||
if (this->ring_buffer_ == nullptr) {
|
if (this->ring_buffer_.use_count() == 0) {
|
||||||
ESP_LOGE(TAG, "Could not allocate ring buffer");
|
ESP_LOGE(TAG, "Could not allocate ring buffer");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -248,10 +248,17 @@ bool MicroWakeWord::allocate_buffers_() {
|
|||||||
|
|
||||||
void MicroWakeWord::deallocate_buffers_() {
|
void MicroWakeWord::deallocate_buffers_() {
|
||||||
ExternalRAMAllocator<int16_t> audio_samples_allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
|
ExternalRAMAllocator<int16_t> audio_samples_allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
|
||||||
audio_samples_allocator.deallocate(this->input_buffer_, INPUT_BUFFER_SIZE * sizeof(int16_t));
|
if (this->input_buffer_ != nullptr) {
|
||||||
this->input_buffer_ = nullptr;
|
audio_samples_allocator.deallocate(this->input_buffer_, INPUT_BUFFER_SIZE * sizeof(int16_t));
|
||||||
audio_samples_allocator.deallocate(this->preprocessor_audio_buffer_, this->new_samples_to_get_());
|
this->input_buffer_ = nullptr;
|
||||||
this->preprocessor_audio_buffer_ = nullptr;
|
}
|
||||||
|
|
||||||
|
if (this->preprocessor_audio_buffer_ != nullptr) {
|
||||||
|
audio_samples_allocator.deallocate(this->preprocessor_audio_buffer_, this->new_samples_to_get_());
|
||||||
|
this->preprocessor_audio_buffer_ = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
this->ring_buffer_.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MicroWakeWord::load_models_() {
|
bool MicroWakeWord::load_models_() {
|
||||||
|
@ -62,9 +62,8 @@ class MicroWakeWord : public Component {
|
|||||||
microphone::Microphone *microphone_{nullptr};
|
microphone::Microphone *microphone_{nullptr};
|
||||||
Trigger<std::string> *wake_word_detected_trigger_ = new Trigger<std::string>();
|
Trigger<std::string> *wake_word_detected_trigger_ = new Trigger<std::string>();
|
||||||
State state_{State::IDLE};
|
State state_{State::IDLE};
|
||||||
HighFrequencyLoopRequester high_freq_;
|
|
||||||
|
|
||||||
std::unique_ptr<RingBuffer> ring_buffer_;
|
std::shared_ptr<RingBuffer> ring_buffer_;
|
||||||
|
|
||||||
std::vector<WakeWordModel> wake_word_models_;
|
std::vector<WakeWordModel> wake_word_models_;
|
||||||
|
|
||||||
@ -98,15 +97,6 @@ class MicroWakeWord : public Component {
|
|||||||
/// @return True if enough samples, false otherwise.
|
/// @return True if enough samples, false otherwise.
|
||||||
bool has_enough_samples_();
|
bool has_enough_samples_();
|
||||||
|
|
||||||
/** Reads audio from microphone into the ring buffer
|
|
||||||
*
|
|
||||||
* Audio data (16000 kHz with int16 samples) is read into the input_buffer_.
|
|
||||||
* Verifies the ring buffer has enough space for all audio data. If not, it logs
|
|
||||||
* a warning and resets the ring buffer entirely.
|
|
||||||
* @return Number of bytes written to the ring buffer
|
|
||||||
*/
|
|
||||||
size_t read_microphone_();
|
|
||||||
|
|
||||||
/// @brief Allocates memory for input_buffer_, preprocessor_audio_buffer_, and ring_buffer_
|
/// @brief Allocates memory for input_buffer_, preprocessor_audio_buffer_, and ring_buffer_
|
||||||
/// @return True if successful, false otherwise
|
/// @return True if successful, false otherwise
|
||||||
bool allocate_buffers_();
|
bool allocate_buffers_();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user