[micro_wake_word] Use microphone callback and avoid unnecessary allocation attempts (#8626)

This commit is contained in:
Kevin Ahrendt 2025-04-27 18:23:25 -05:00 committed by GitHub
parent e557bca420
commit ee646d7324
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 49 additions and 52 deletions

View File

@ -61,6 +61,29 @@ void MicroWakeWord::dump_config() {
void MicroWakeWord::setup() {
ESP_LOGCONFIG(TAG, "Setting up microWakeWord...");
this->microphone_->add_data_callback([this](const std::vector<int16_t> &data) {
if (this->state_ != State::DETECTING_WAKE_WORD) {
return;
}
std::shared_ptr<RingBuffer> temp_ring_buffer = this->ring_buffer_;
if (this->ring_buffer_.use_count() == 2) {
// mWW still owns the ring buffer and temp_ring_buffer does as well, proceed to copy audio into ring buffer
size_t bytes_free = temp_ring_buffer->free();
if (bytes_free < data.size() * sizeof(int16_t)) {
ESP_LOGW(
TAG,
"Not enough free bytes in ring buffer to store incoming audio data (free bytes=%d, incoming bytes=%d). "
"Resetting the ring buffer. Wake word detection accuracy will be reduced.",
bytes_free, data.size());
temp_ring_buffer->reset();
}
temp_ring_buffer->write((void *) data.data(), data.size() * sizeof(int16_t));
}
});
if (!this->register_streaming_ops_(this->streaming_op_resolver_)) {
this->mark_failed();
return;
@ -107,7 +130,6 @@ void MicroWakeWord::loop() {
ESP_LOGD(TAG, "Starting Microphone");
this->microphone_->start();
this->set_state_(State::STARTING_MICROPHONE);
this->high_freq_.start();
break;
case State::STARTING_MICROPHONE:
if (this->microphone_->is_running()) {
@ -115,21 +137,19 @@ void MicroWakeWord::loop() {
}
break;
case State::DETECTING_WAKE_WORD:
while (!this->has_enough_samples_()) {
this->read_microphone_();
}
while (this->has_enough_samples_()) {
this->update_model_probabilities_();
if (this->detect_wake_words_()) {
ESP_LOGD(TAG, "Wake Word '%s' Detected", (this->detected_wake_word_).c_str());
this->detected_ = true;
this->set_state_(State::STOP_MICROPHONE);
}
}
break;
case State::STOP_MICROPHONE:
ESP_LOGD(TAG, "Stopping Microphone");
this->microphone_->stop();
this->set_state_(State::STOPPING_MICROPHONE);
this->high_freq_.stop();
this->unload_models_();
this->deallocate_buffers_();
break;
@ -157,6 +177,11 @@ void MicroWakeWord::start() {
return;
}
if (this->state_ != State::IDLE) {
ESP_LOGW(TAG, "Wake word is already running");
return;
}
if (!this->load_models_() || !this->allocate_buffers_()) {
ESP_LOGE(TAG, "Failed to load the wake word model(s) or allocate buffers");
this->status_set_error();
@ -169,11 +194,6 @@ void MicroWakeWord::start() {
return;
}
if (this->state_ != State::IDLE) {
ESP_LOGW(TAG, "Wake word is already running");
return;
}
this->reset_states_();
this->set_state_(State::START_MICROPHONE);
}
@ -196,26 +216,6 @@ void MicroWakeWord::set_state_(State state) {
this->state_ = state;
}
size_t MicroWakeWord::read_microphone_() {
size_t bytes_read = this->microphone_->read(this->input_buffer_, INPUT_BUFFER_SIZE * sizeof(int16_t));
if (bytes_read == 0) {
return 0;
}
size_t bytes_free = this->ring_buffer_->free();
if (bytes_free < bytes_read) {
ESP_LOGW(TAG,
"Not enough free bytes in ring buffer to store incoming audio data (free bytes=%d, incoming bytes=%d). "
"Resetting the ring buffer. Wake word detection accuracy will be reduced.",
bytes_free, bytes_read);
this->ring_buffer_->reset();
}
return this->ring_buffer_->write((void *) this->input_buffer_, bytes_read);
}
bool MicroWakeWord::allocate_buffers_() {
ExternalRAMAllocator<int16_t> audio_samples_allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
@ -235,9 +235,9 @@ bool MicroWakeWord::allocate_buffers_() {
}
}
if (this->ring_buffer_ == nullptr) {
if (this->ring_buffer_.use_count() == 0) {
this->ring_buffer_ = RingBuffer::create(BUFFER_SIZE * sizeof(int16_t));
if (this->ring_buffer_ == nullptr) {
if (this->ring_buffer_.use_count() == 0) {
ESP_LOGE(TAG, "Could not allocate ring buffer");
return false;
}
@ -248,12 +248,19 @@ bool MicroWakeWord::allocate_buffers_() {
void MicroWakeWord::deallocate_buffers_() {
ExternalRAMAllocator<int16_t> audio_samples_allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
if (this->input_buffer_ != nullptr) {
audio_samples_allocator.deallocate(this->input_buffer_, INPUT_BUFFER_SIZE * sizeof(int16_t));
this->input_buffer_ = nullptr;
}
if (this->preprocessor_audio_buffer_ != nullptr) {
audio_samples_allocator.deallocate(this->preprocessor_audio_buffer_, this->new_samples_to_get_());
this->preprocessor_audio_buffer_ = nullptr;
}
this->ring_buffer_.reset();
}
bool MicroWakeWord::load_models_() {
// Setup preprocesor feature generator
if (!FrontendPopulateState(&this->frontend_config_, &this->frontend_state_, AUDIO_SAMPLE_FREQUENCY)) {

View File

@ -62,9 +62,8 @@ class MicroWakeWord : public Component {
microphone::Microphone *microphone_{nullptr};
Trigger<std::string> *wake_word_detected_trigger_ = new Trigger<std::string>();
State state_{State::IDLE};
HighFrequencyLoopRequester high_freq_;
std::unique_ptr<RingBuffer> ring_buffer_;
std::shared_ptr<RingBuffer> ring_buffer_;
std::vector<WakeWordModel> wake_word_models_;
@ -98,15 +97,6 @@ class MicroWakeWord : public Component {
/// @return True if enough samples, false otherwise.
bool has_enough_samples_();
/** Reads audio from microphone into the ring buffer
*
* Audio data (16000 kHz with int16 samples) is read into the input_buffer_.
* Verifies the ring buffer has enough space for all audio data. If not, it logs
* a warning and resets the ring buffer entirely.
* @return Number of bytes written to the ring buffer
*/
size_t read_microphone_();
/// @brief Allocates memory for input_buffer_, preprocessor_audio_buffer_, and ring_buffer_
/// @return True if successful, false otherwise
bool allocate_buffers_();