[i2s_audio] Speaker improvements: CPU core agnostic and more accurate timestamps (#9800)

Co-authored-by: NP v/d Spek <github_mail@lumensoft.nl>
This commit is contained in:
Kevin Ahrendt 2025-07-24 04:14:00 +01:00 committed by GitHub
parent 108e447072
commit 6398bb2fdf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 302 additions and 307 deletions

View File

@ -1,6 +1,6 @@
from esphome import pins
import esphome.codegen as cg
from esphome.components.esp32 import get_esp32_variant
from esphome.components.esp32 import add_idf_sdkconfig_option, get_esp32_variant
from esphome.components.esp32.const import (
VARIANT_ESP32,
VARIANT_ESP32C3,
@ -258,6 +258,10 @@ async def to_code(config):
if use_legacy():
cg.add_define("USE_I2S_LEGACY")
# Helps avoid callbacks being skipped due to processor load
if CORE.using_esp_idf:
add_idf_sdkconfig_option("CONFIG_I2S_ISR_IRAM_SAFE", True)
cg.add(var.set_lrclk_pin(config[CONF_I2S_LRCLK_PIN]))
if CONF_I2S_BCLK_PIN in config:
cg.add(var.set_bclk_pin(config[CONF_I2S_BCLK_PIN]))

View File

@ -9,6 +9,7 @@
#endif
#include "esphome/components/audio/audio.h"
#include "esphome/components/audio/audio_transfer_buffer.h"
#include "esphome/core/application.h"
#include "esphome/core/hal.h"
@ -19,72 +20,33 @@
namespace esphome {
namespace i2s_audio {
static const uint8_t DMA_BUFFER_DURATION_MS = 15;
static const uint32_t DMA_BUFFER_DURATION_MS = 15;
static const size_t DMA_BUFFERS_COUNT = 4;
static const size_t TASK_DELAY_MS = DMA_BUFFER_DURATION_MS * DMA_BUFFERS_COUNT / 2;
static const size_t TASK_STACK_SIZE = 4096;
static const ssize_t TASK_PRIORITY = 23;
static const ssize_t TASK_PRIORITY = 19;
static const size_t I2S_EVENT_QUEUE_COUNT = DMA_BUFFERS_COUNT + 1;
static const char *const TAG = "i2s_audio.speaker";
enum SpeakerEventGroupBits : uint32_t {
COMMAND_START = (1 << 0), // starts the speaker task
COMMAND_START = (1 << 0), // indicates loop should start speaker task
COMMAND_STOP = (1 << 1), // stops the speaker task
COMMAND_STOP_GRACEFULLY = (1 << 2), // Stops the speaker task once all data has been written
STATE_STARTING = (1 << 10),
STATE_RUNNING = (1 << 11),
STATE_STOPPING = (1 << 12),
STATE_STOPPED = (1 << 13),
ERR_TASK_FAILED_TO_START = (1 << 14),
ERR_ESP_INVALID_STATE = (1 << 15),
ERR_ESP_NOT_SUPPORTED = (1 << 16),
ERR_ESP_INVALID_ARG = (1 << 17),
ERR_ESP_INVALID_SIZE = (1 << 18),
TASK_STARTING = (1 << 10),
TASK_RUNNING = (1 << 11),
TASK_STOPPING = (1 << 12),
TASK_STOPPED = (1 << 13),
ERR_ESP_NO_MEM = (1 << 19),
ERR_ESP_FAIL = (1 << 20),
ALL_ERR_ESP_BITS = ERR_ESP_INVALID_STATE | ERR_ESP_NOT_SUPPORTED | ERR_ESP_INVALID_ARG | ERR_ESP_INVALID_SIZE |
ERR_ESP_NO_MEM | ERR_ESP_FAIL,
WARN_DROPPED_EVENT = (1 << 20),
ALL_BITS = 0x00FFFFFF, // All valid FreeRTOS event group bits
};
// Translates a SpeakerEventGroupBits ERR_ESP bit to the coressponding esp_err_t
static esp_err_t err_bit_to_esp_err(uint32_t bit) {
switch (bit) {
case SpeakerEventGroupBits::ERR_ESP_INVALID_STATE:
return ESP_ERR_INVALID_STATE;
case SpeakerEventGroupBits::ERR_ESP_INVALID_ARG:
return ESP_ERR_INVALID_ARG;
case SpeakerEventGroupBits::ERR_ESP_INVALID_SIZE:
return ESP_ERR_INVALID_SIZE;
case SpeakerEventGroupBits::ERR_ESP_NO_MEM:
return ESP_ERR_NO_MEM;
case SpeakerEventGroupBits::ERR_ESP_NOT_SUPPORTED:
return ESP_ERR_NOT_SUPPORTED;
default:
return ESP_FAIL;
}
}
/// @brief Multiplies the input array of Q15 numbers by a Q15 constant factor
///
/// Based on `dsps_mulc_s16_ansi` from the esp-dsp library:
/// https://github.com/espressif/esp-dsp/blob/master/modules/math/mulc/fixed/dsps_mulc_s16_ansi.c
/// (accessed on 2024-09-30).
/// @param input Array of Q15 numbers
/// @param output Array of Q15 numbers
/// @param len Length of array
/// @param c Q15 constant factor
static void q15_multiplication(const int16_t *input, int16_t *output, size_t len, int16_t c) {
for (int i = 0; i < len; i++) {
int32_t acc = (int32_t) input[i] * (int32_t) c;
output[i] = (int16_t) (acc >> 15);
}
}
// Lists the Q15 fixed point scaling factor for volume reduction.
// Has 100 values representing silence and a reduction [49, 48.5, ... 0.5, 0] dB.
// dB to PCM scaling factor formula: floating_point_scale_factor = 2^(-db/6.014)
@ -132,51 +94,80 @@ void I2SAudioSpeaker::dump_config() {
void I2SAudioSpeaker::loop() {
uint32_t event_group_bits = xEventGroupGetBits(this->event_group_);
if (event_group_bits & SpeakerEventGroupBits::STATE_STARTING) {
ESP_LOGD(TAG, "Starting");
if ((event_group_bits & SpeakerEventGroupBits::COMMAND_START) && (this->state_ == speaker::STATE_STOPPED)) {
this->state_ = speaker::STATE_STARTING;
xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::STATE_STARTING);
xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::COMMAND_START);
}
if (event_group_bits & SpeakerEventGroupBits::STATE_RUNNING) {
// Handle the task's state
if (event_group_bits & SpeakerEventGroupBits::TASK_STARTING) {
ESP_LOGD(TAG, "Starting");
xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::TASK_STARTING);
}
if (event_group_bits & SpeakerEventGroupBits::TASK_RUNNING) {
ESP_LOGD(TAG, "Started");
xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::TASK_RUNNING);
this->state_ = speaker::STATE_RUNNING;
xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::STATE_RUNNING);
this->status_clear_warning();
this->status_clear_error();
}
if (event_group_bits & SpeakerEventGroupBits::STATE_STOPPING) {
if (event_group_bits & SpeakerEventGroupBits::TASK_STOPPING) {
ESP_LOGD(TAG, "Stopping");
xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::TASK_STOPPING);
this->state_ = speaker::STATE_STOPPING;
xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::STATE_STOPPING);
}
if (event_group_bits & SpeakerEventGroupBits::STATE_STOPPED) {
if (!this->task_created_) {
ESP_LOGD(TAG, "Stopped");
this->state_ = speaker::STATE_STOPPED;
xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::ALL_BITS);
this->speaker_task_handle_ = nullptr;
}
if (event_group_bits & SpeakerEventGroupBits::TASK_STOPPED) {
ESP_LOGD(TAG, "Stopped");
vTaskDelete(this->speaker_task_handle_);
this->speaker_task_handle_ = nullptr;
this->stop_i2s_driver_();
xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::ALL_BITS);
this->status_clear_error();
this->state_ = speaker::STATE_STOPPED;
}
if (event_group_bits & SpeakerEventGroupBits::ERR_TASK_FAILED_TO_START) {
this->status_set_error("Failed to start task");
xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::ERR_TASK_FAILED_TO_START);
// Log any errors encounted by the task
if (event_group_bits & SpeakerEventGroupBits::ERR_ESP_NO_MEM) {
ESP_LOGE(TAG, "Not enough memory");
xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_NO_MEM);
}
if (event_group_bits & SpeakerEventGroupBits::ALL_ERR_ESP_BITS) {
uint32_t error_bits = event_group_bits & SpeakerEventGroupBits::ALL_ERR_ESP_BITS;
ESP_LOGW(TAG, "Writing failed: %s", esp_err_to_name(err_bit_to_esp_err(error_bits)));
this->status_set_warning();
// Warn if any playback timestamp events are dropped, which drastically reduces synced playback accuracy
if (event_group_bits & SpeakerEventGroupBits::WARN_DROPPED_EVENT) {
ESP_LOGW(TAG, "Event dropped, synchronized playback accuracy is reduced");
xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::WARN_DROPPED_EVENT);
}
if (event_group_bits & SpeakerEventGroupBits::ERR_ESP_NOT_SUPPORTED) {
this->status_set_error("Failed to adjust bus to match incoming audio");
ESP_LOGE(TAG, "Incompatible audio format: sample rate = %" PRIu32 ", channels = %u, bits per sample = %u",
this->audio_stream_info_.get_sample_rate(), this->audio_stream_info_.get_channels(),
this->audio_stream_info_.get_bits_per_sample());
}
// Handle the speaker's state
switch (this->state_) {
case speaker::STATE_STARTING:
if (this->status_has_error()) {
break;
}
xEventGroupClearBits(this->event_group_, ALL_ERR_ESP_BITS);
if (this->start_i2s_driver_(this->audio_stream_info_) != ESP_OK) {
ESP_LOGE(TAG, "Driver failed to start; retrying in 1 second");
this->status_momentary_error("driver-faiure", 1000);
break;
}
if (this->speaker_task_handle_ == nullptr) {
xTaskCreate(I2SAudioSpeaker::speaker_task, "speaker_task", TASK_STACK_SIZE, (void *) this, TASK_PRIORITY,
&this->speaker_task_handle_);
if (this->speaker_task_handle_ == nullptr) {
ESP_LOGE(TAG, "Task failed to start, retrying in 1 second");
this->status_momentary_error("task-failure", 1000);
this->stop_i2s_driver_(); // Stops the driver to return the lock; will be reloaded in next attempt
}
}
break;
case speaker::STATE_RUNNING: // Intentional fallthrough
case speaker::STATE_STOPPING: // Intentional fallthrough
case speaker::STATE_STOPPED:
break;
}
}
void I2SAudioSpeaker::set_volume(float volume) {
@ -227,83 +218,76 @@ size_t I2SAudioSpeaker::play(const uint8_t *data, size_t length, TickType_t tick
this->start();
}
if ((this->state_ != speaker::STATE_RUNNING) || (this->audio_ring_buffer_.use_count() != 1)) {
if (this->state_ != speaker::STATE_RUNNING) {
// Unable to write data to a running speaker, so delay the max amount of time so it can get ready
vTaskDelay(ticks_to_wait);
ticks_to_wait = 0;
}
size_t bytes_written = 0;
if ((this->state_ == speaker::STATE_RUNNING) && (this->audio_ring_buffer_.use_count() == 1)) {
// Only one owner of the ring buffer (the speaker task), so the ring buffer is allocated and no other components are
// attempting to write to it.
// Temporarily share ownership of the ring buffer so it won't be deallocated while writing
std::shared_ptr<RingBuffer> temp_ring_buffer = this->audio_ring_buffer_;
bytes_written = temp_ring_buffer->write_without_replacement((void *) data, length, ticks_to_wait);
if (this->state_ == speaker::STATE_RUNNING) {
std::shared_ptr<RingBuffer> temp_ring_buffer = this->audio_ring_buffer_.lock();
if (temp_ring_buffer.use_count() == 2) {
// Only the speaker task and this temp_ring_buffer own the ring buffer, so its safe to write to
bytes_written = temp_ring_buffer->write_without_replacement((void *) data, length, ticks_to_wait);
}
}
return bytes_written;
}
bool I2SAudioSpeaker::has_buffered_data() const {
if (this->audio_ring_buffer_ != nullptr) {
return this->audio_ring_buffer_->available() > 0;
if (this->audio_ring_buffer_.use_count() > 0) {
std::shared_ptr<RingBuffer> temp_ring_buffer = this->audio_ring_buffer_.lock();
return temp_ring_buffer->available() > 0;
}
return false;
}
void I2SAudioSpeaker::speaker_task(void *params) {
I2SAudioSpeaker *this_speaker = (I2SAudioSpeaker *) params;
this_speaker->task_created_ = true;
uint32_t event_group_bits =
xEventGroupWaitBits(this_speaker->event_group_,
SpeakerEventGroupBits::COMMAND_START | SpeakerEventGroupBits::COMMAND_STOP |
SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY, // Bit message to read
pdTRUE, // Clear the bits on exit
pdFALSE, // Don't wait for all the bits,
portMAX_DELAY); // Block indefinitely until a bit is set
if (event_group_bits & (SpeakerEventGroupBits::COMMAND_STOP | SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY)) {
// Received a stop signal before the task was requested to start
this_speaker->delete_task_(0);
}
xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::STATE_STARTING);
audio::AudioStreamInfo audio_stream_info = this_speaker->audio_stream_info_;
xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::TASK_STARTING);
const uint32_t dma_buffers_duration_ms = DMA_BUFFER_DURATION_MS * DMA_BUFFERS_COUNT;
// Ensure ring buffer duration is at least the duration of all DMA buffers
const uint32_t ring_buffer_duration = std::max(dma_buffers_duration_ms, this_speaker->buffer_duration_ms_);
// The DMA buffers may have more bits per sample, so calculate buffer sizes based in the input audio stream info
const size_t data_buffer_size = audio_stream_info.ms_to_bytes(dma_buffers_duration_ms);
const size_t ring_buffer_size = audio_stream_info.ms_to_bytes(ring_buffer_duration);
const size_t ring_buffer_size = this_speaker->current_stream_info_.ms_to_bytes(ring_buffer_duration);
const size_t single_dma_buffer_input_size = data_buffer_size / DMA_BUFFERS_COUNT;
const uint32_t frames_to_fill_single_dma_buffer =
this_speaker->current_stream_info_.ms_to_frames(DMA_BUFFER_DURATION_MS);
const size_t bytes_to_fill_single_dma_buffer =
this_speaker->current_stream_info_.frames_to_bytes(frames_to_fill_single_dma_buffer);
if (this_speaker->send_esp_err_to_event_group_(this_speaker->allocate_buffers_(data_buffer_size, ring_buffer_size))) {
// Failed to allocate buffers
xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_NO_MEM);
this_speaker->delete_task_(data_buffer_size);
bool successful_setup = false;
std::unique_ptr<audio::AudioSourceTransferBuffer> transfer_buffer =
audio::AudioSourceTransferBuffer::create(bytes_to_fill_single_dma_buffer);
if (transfer_buffer != nullptr) {
std::shared_ptr<RingBuffer> temp_ring_buffer = RingBuffer::create(ring_buffer_size);
if (temp_ring_buffer.use_count() == 1) {
transfer_buffer->set_source(temp_ring_buffer);
this_speaker->audio_ring_buffer_ = temp_ring_buffer;
successful_setup = true;
}
}
if (!this_speaker->send_esp_err_to_event_group_(this_speaker->start_i2s_driver_(audio_stream_info))) {
xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::STATE_RUNNING);
if (!successful_setup) {
xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_NO_MEM);
} else {
bool stop_gracefully = false;
bool tx_dma_underflow = true;
uint32_t frames_written = 0;
uint32_t last_data_received_time = millis();
bool tx_dma_underflow = false;
this_speaker->accumulated_frames_written_ = 0;
xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::TASK_RUNNING);
// Keep looping if paused, there is no timeout configured, or data was received more recently than the configured
// timeout
while (this_speaker->pause_state_ || !this_speaker->timeout_.has_value() ||
(millis() - last_data_received_time) <= this_speaker->timeout_.value()) {
event_group_bits = xEventGroupGetBits(this_speaker->event_group_);
uint32_t event_group_bits = xEventGroupGetBits(this_speaker->event_group_);
if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP) {
xEventGroupClearBits(this_speaker->event_group_, SpeakerEventGroupBits::COMMAND_STOP);
@ -314,7 +298,7 @@ void I2SAudioSpeaker::speaker_task(void *params) {
stop_gracefully = true;
}
if (this_speaker->audio_stream_info_ != audio_stream_info) {
if (this_speaker->audio_stream_info_ != this_speaker->current_stream_info_) {
// Audio stream info changed, stop the speaker task so it will restart with the proper settings.
break;
}
@ -326,36 +310,75 @@ void I2SAudioSpeaker::speaker_task(void *params) {
}
}
#else
bool overflow;
while (xQueueReceive(this_speaker->i2s_event_queue_, &overflow, 0)) {
if (overflow) {
int64_t write_timestamp;
while (xQueueReceive(this_speaker->i2s_event_queue_, &write_timestamp, 0)) {
// Receives timing events from the I2S on_sent callback. If actual audio data was sent in this event, it passes
// on the timing info via the audio_output_callback.
uint32_t frames_sent = frames_to_fill_single_dma_buffer;
if (frames_to_fill_single_dma_buffer > frames_written) {
tx_dma_underflow = true;
frames_sent = frames_written;
const uint32_t frames_zeroed = frames_to_fill_single_dma_buffer - frames_written;
write_timestamp -= this_speaker->current_stream_info_.frames_to_microseconds(frames_zeroed);
} else {
tx_dma_underflow = false;
}
frames_written -= frames_sent;
if (frames_sent > 0) {
this_speaker->audio_output_callback_(frames_sent, write_timestamp);
}
}
#endif
if (this_speaker->pause_state_) {
// Pause state is accessed atomically, so thread safe
// Delay so the task can yields, then skip transferring audio data
delay(TASK_DELAY_MS);
// Delay so the task yields, then skip transferring audio data
vTaskDelay(pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS));
continue;
}
size_t bytes_read = this_speaker->audio_ring_buffer_->read((void *) this_speaker->data_buffer_, data_buffer_size,
pdMS_TO_TICKS(TASK_DELAY_MS));
// Wait half the duration of the data already written to the DMA buffers for new audio data
// The millisecond helper modifies the frames_written variable, so use the microsecond helper and divide by 1000
const uint32_t read_delay =
(this_speaker->current_stream_info_.frames_to_microseconds(frames_written) / 1000) / 2;
uint8_t *new_data = transfer_buffer->get_buffer_end(); // track start of any newly copied bytes
size_t bytes_read = transfer_buffer->transfer_data_from_source(pdMS_TO_TICKS(read_delay));
if (bytes_read > 0) {
if ((audio_stream_info.get_bits_per_sample() == 16) && (this_speaker->q15_volume_factor_ < INT16_MAX)) {
// Scale samples by the volume factor in place
q15_multiplication((int16_t *) this_speaker->data_buffer_, (int16_t *) this_speaker->data_buffer_,
bytes_read / sizeof(int16_t), this_speaker->q15_volume_factor_);
if (this_speaker->q15_volume_factor_ < INT16_MAX) {
// Apply the software volume adjustment by unpacking the sample into a Q31 fixed-point number, shifting it,
// multiplying by the volume factor, and packing the sample back into the original bytes per sample.
const size_t bytes_per_sample = this_speaker->current_stream_info_.samples_to_bytes(1);
const uint32_t len = bytes_read / bytes_per_sample;
// Use Q16 for samples with 1 or 2 bytes: shifted_sample * gain_factor is Q16 * Q15 -> Q31
int32_t shift = 15; // Q31 -> Q16
int32_t gain_factor = this_speaker->q15_volume_factor_; // Q15
if (bytes_per_sample >= 3) {
// Use Q23 for samples with 3 or 4 bytes: shifted_sample * gain_factor is Q23 * Q8 -> Q31
shift = 8; // Q31 -> Q23
gain_factor >>= 7; // Q15 -> Q8
}
for (uint32_t i = 0; i < len; ++i) {
int32_t sample =
audio::unpack_audio_sample_to_q31(&new_data[i * bytes_per_sample], bytes_per_sample); // Q31
sample >>= shift;
sample *= gain_factor; // Q31
audio::pack_q31_as_audio_sample(sample, &new_data[i * bytes_per_sample], bytes_per_sample);
}
}
#ifdef USE_ESP32_VARIANT_ESP32
// For ESP32 8/16 bit mono mode samples need to be switched.
if (audio_stream_info.get_channels() == 1 && audio_stream_info.get_bits_per_sample() <= 16) {
if (this_speaker->current_stream_info_.get_channels() == 1 &&
this_speaker->current_stream_info_.get_bits_per_sample() <= 16) {
size_t len = bytes_read / sizeof(int16_t);
int16_t *tmp_buf = (int16_t *) this_speaker->data_buffer_;
int16_t *tmp_buf = (int16_t *) new_data;
for (int i = 0; i < len; i += 2) {
int16_t tmp = tmp_buf[i];
tmp_buf[i] = tmp_buf[i + 1];
@ -363,62 +386,87 @@ void I2SAudioSpeaker::speaker_task(void *params) {
}
}
#endif
// Write the audio data to a single DMA buffer at a time to reduce latency for the audio duration played
// callback.
const uint32_t batches = (bytes_read + single_dma_buffer_input_size - 1) / single_dma_buffer_input_size;
}
for (uint32_t i = 0; i < batches; ++i) {
size_t bytes_written = 0;
size_t bytes_to_write = std::min(single_dma_buffer_input_size, bytes_read);
#ifdef USE_I2S_LEGACY
if (audio_stream_info.get_bits_per_sample() == (uint8_t) this_speaker->bits_per_sample_) {
i2s_write(this_speaker->parent_->get_port(), this_speaker->data_buffer_ + i * single_dma_buffer_input_size,
bytes_to_write, &bytes_written, pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS * 5));
} else if (audio_stream_info.get_bits_per_sample() < (uint8_t) this_speaker->bits_per_sample_) {
i2s_write_expand(this_speaker->parent_->get_port(),
this_speaker->data_buffer_ + i * single_dma_buffer_input_size, bytes_to_write,
audio_stream_info.get_bits_per_sample(), this_speaker->bits_per_sample_, &bytes_written,
pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS * 5));
}
#else
i2s_channel_write(this_speaker->tx_handle_, this_speaker->data_buffer_ + i * single_dma_buffer_input_size,
bytes_to_write, &bytes_written, pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS * 5));
#endif
int64_t now = esp_timer_get_time();
if (bytes_written != bytes_to_write) {
xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_SIZE);
}
bytes_read -= bytes_written;
this_speaker->audio_output_callback_(audio_stream_info.bytes_to_frames(bytes_written),
now + dma_buffers_duration_ms * 1000);
tx_dma_underflow = false;
last_data_received_time = millis();
}
} else {
// No data received
if (transfer_buffer->available() == 0) {
if (stop_gracefully && tx_dma_underflow) {
break;
}
vTaskDelay(pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS / 2));
} else {
size_t bytes_written = 0;
#ifdef USE_I2S_LEGACY
if (this_speaker->current_stream_info_.get_bits_per_sample() == (uint8_t) this_speaker->bits_per_sample_) {
i2s_write(this_speaker->parent_->get_port(), transfer_buffer->get_buffer_start(),
transfer_buffer->available(), &bytes_written, pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS));
} else if (this_speaker->current_stream_info_.get_bits_per_sample() <
(uint8_t) this_speaker->bits_per_sample_) {
i2s_write_expand(this_speaker->parent_->get_port(), transfer_buffer->get_buffer_start(),
transfer_buffer->available(), this_speaker->current_stream_info_.get_bits_per_sample(),
this_speaker->bits_per_sample_, &bytes_written, pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS));
}
#else
if (tx_dma_underflow) {
// Temporarily disable channel and callback to reset the I2S driver's internal DMA buffer queue so timing
// callbacks are accurate. Preload the data.
i2s_channel_disable(this_speaker->tx_handle_);
const i2s_event_callbacks_t callbacks = {
.on_sent = nullptr,
};
i2s_channel_register_event_callback(this_speaker->tx_handle_, &callbacks, this_speaker);
i2s_channel_preload_data(this_speaker->tx_handle_, transfer_buffer->get_buffer_start(),
transfer_buffer->available(), &bytes_written);
} else {
// Audio is already playing, use regular I2S write to add to the DMA buffers
i2s_channel_write(this_speaker->tx_handle_, transfer_buffer->get_buffer_start(), transfer_buffer->available(),
&bytes_written, DMA_BUFFER_DURATION_MS);
}
#endif
if (bytes_written > 0) {
last_data_received_time = millis();
frames_written += this_speaker->current_stream_info_.bytes_to_frames(bytes_written);
transfer_buffer->decrease_buffer_length(bytes_written);
if (tx_dma_underflow) {
tx_dma_underflow = false;
#ifndef USE_I2S_LEGACY
// Reset the event queue timestamps
// Enable the on_sent callback to accurately track the timestamps of played audio
// Enable the I2S channel to start sending the preloaded audio
xQueueReset(this_speaker->i2s_event_queue_);
const i2s_event_callbacks_t callbacks = {
.on_sent = i2s_on_sent_cb,
};
i2s_channel_register_event_callback(this_speaker->tx_handle_, &callbacks, this_speaker);
i2s_channel_enable(this_speaker->tx_handle_);
#endif
}
#ifdef USE_I2S_LEGACY
// The legacy driver doesn't easily support the callback approach for timestamps, so fall back to a direct but
// less accurate approach.
this_speaker->audio_output_callback_(this_speaker->current_stream_info_.bytes_to_frames(bytes_written),
esp_timer_get_time() + dma_buffers_duration_ms * 1000);
#endif
}
}
}
xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::STATE_STOPPING);
#ifdef USE_I2S_LEGACY
i2s_driver_uninstall(this_speaker->parent_->get_port());
#else
i2s_channel_disable(this_speaker->tx_handle_);
i2s_del_channel(this_speaker->tx_handle_);
#endif
this_speaker->parent_->unlock();
}
this_speaker->delete_task_(data_buffer_size);
xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::TASK_STOPPING);
if (transfer_buffer != nullptr) {
transfer_buffer.reset();
}
xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::TASK_STOPPED);
while (true) {
// Continuously delay until the loop method deletes the task
vTaskDelay(pdMS_TO_TICKS(10));
}
}
void I2SAudioSpeaker::start() {
@ -427,16 +475,7 @@ void I2SAudioSpeaker::start() {
if ((this->state_ == speaker::STATE_STARTING) || (this->state_ == speaker::STATE_RUNNING))
return;
if (!this->task_created_ && (this->speaker_task_handle_ == nullptr)) {
xTaskCreate(I2SAudioSpeaker::speaker_task, "speaker_task", TASK_STACK_SIZE, (void *) this, TASK_PRIORITY,
&this->speaker_task_handle_);
if (this->speaker_task_handle_ != nullptr) {
xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::COMMAND_START);
} else {
xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_TASK_FAILED_TO_START);
}
}
xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::COMMAND_START);
}
void I2SAudioSpeaker::stop() { this->stop_(false); }
@ -456,61 +495,16 @@ void I2SAudioSpeaker::stop_(bool wait_on_empty) {
}
}
bool I2SAudioSpeaker::send_esp_err_to_event_group_(esp_err_t err) {
switch (err) {
case ESP_OK:
return false;
case ESP_ERR_INVALID_STATE:
xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_STATE);
return true;
case ESP_ERR_INVALID_ARG:
xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_ARG);
return true;
case ESP_ERR_INVALID_SIZE:
xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_SIZE);
return true;
case ESP_ERR_NO_MEM:
xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_NO_MEM);
return true;
case ESP_ERR_NOT_SUPPORTED:
xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_NOT_SUPPORTED);
return true;
default:
xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_FAIL);
return true;
}
}
esp_err_t I2SAudioSpeaker::allocate_buffers_(size_t data_buffer_size, size_t ring_buffer_size) {
if (this->data_buffer_ == nullptr) {
// Allocate data buffer for temporarily storing audio from the ring buffer before writing to the I2S bus
RAMAllocator<uint8_t> allocator;
this->data_buffer_ = allocator.allocate(data_buffer_size);
}
if (this->data_buffer_ == nullptr) {
return ESP_ERR_NO_MEM;
}
if (this->audio_ring_buffer_.use_count() == 0) {
// Allocate ring buffer. Uses a shared_ptr to ensure it isn't improperly deallocated.
this->audio_ring_buffer_ = RingBuffer::create(ring_buffer_size);
}
if (this->audio_ring_buffer_ == nullptr) {
return ESP_ERR_NO_MEM;
}
return ESP_OK;
}
esp_err_t I2SAudioSpeaker::start_i2s_driver_(audio::AudioStreamInfo &audio_stream_info) {
this->current_stream_info_ = audio_stream_info; // store the stream info settings the driver will use
#ifdef USE_I2S_LEGACY
if ((this->i2s_mode_ & I2S_MODE_SLAVE) && (this->sample_rate_ != audio_stream_info.get_sample_rate())) { // NOLINT
#else
if ((this->i2s_role_ & I2S_ROLE_SLAVE) && (this->sample_rate_ != audio_stream_info.get_sample_rate())) { // NOLINT
#endif
// Can't reconfigure I2S bus, so the sample rate must match the configured value
ESP_LOGE(TAG, "Audio stream settings are not compatible with this I2S configuration");
return ESP_ERR_NOT_SUPPORTED;
}
@ -521,10 +515,12 @@ esp_err_t I2SAudioSpeaker::start_i2s_driver_(audio::AudioStreamInfo &audio_strea
(i2s_slot_bit_width_t) audio_stream_info.get_bits_per_sample() > this->slot_bit_width_) {
#endif
// Currently can't handle the case when the incoming audio has more bits per sample than the configured value
ESP_LOGE(TAG, "Audio streams with more bits per sample than the I2S speaker's configuration is not supported");
return ESP_ERR_NOT_SUPPORTED;
}
if (!this->parent_->try_lock()) {
ESP_LOGE(TAG, "Parent I2S bus not free");
return ESP_ERR_INVALID_STATE;
}
@ -575,6 +571,7 @@ esp_err_t I2SAudioSpeaker::start_i2s_driver_(audio::AudioStreamInfo &audio_strea
esp_err_t err =
i2s_driver_install(this->parent_->get_port(), &config, I2S_EVENT_QUEUE_COUNT, &this->i2s_event_queue_);
if (err != ESP_OK) {
ESP_LOGE(TAG, "Failed to install I2S legacy driver");
// Failed to install the driver, so unlock the I2S port
this->parent_->unlock();
return err;
@ -595,6 +592,7 @@ esp_err_t I2SAudioSpeaker::start_i2s_driver_(audio::AudioStreamInfo &audio_strea
if (err != ESP_OK) {
// Failed to set the data out pin, so uninstall the driver and unlock the I2S port
ESP_LOGE(TAG, "Failed to set the data out pin");
i2s_driver_uninstall(this->parent_->get_port());
this->parent_->unlock();
}
@ -605,10 +603,12 @@ esp_err_t I2SAudioSpeaker::start_i2s_driver_(audio::AudioStreamInfo &audio_strea
.dma_desc_num = DMA_BUFFERS_COUNT,
.dma_frame_num = dma_buffer_length,
.auto_clear = true,
.intr_priority = 3,
};
/* Allocate a new TX channel and get the handle of this channel */
esp_err_t err = i2s_new_channel(&chan_cfg, &this->tx_handle_, NULL);
if (err != ESP_OK) {
ESP_LOGE(TAG, "Failed to allocate new I2S channel");
this->parent_->unlock();
return err;
}
@ -652,7 +652,11 @@ esp_err_t I2SAudioSpeaker::start_i2s_driver_(audio::AudioStreamInfo &audio_strea
// per sample causes the audio to play too fast. Setting the ws_width to the configured slot bit width seems to
// make it play at the correct speed while sending more bits per slot.
if (this->slot_bit_width_ != I2S_SLOT_BIT_WIDTH_AUTO) {
std_slot_cfg.ws_width = static_cast<uint32_t>(this->slot_bit_width_);
uint32_t configured_bit_width = static_cast<uint32_t>(this->slot_bit_width_);
std_slot_cfg.ws_width = configured_bit_width;
if (configured_bit_width > 16) {
std_slot_cfg.msb_right = false;
}
}
#else
std_slot_cfg.slot_bit_width = this->slot_bit_width_;
@ -670,54 +674,56 @@ esp_err_t I2SAudioSpeaker::start_i2s_driver_(audio::AudioStreamInfo &audio_strea
err = i2s_channel_init_std_mode(this->tx_handle_, &std_cfg);
if (err != ESP_OK) {
ESP_LOGE(TAG, "Failed to initialize channel");
i2s_del_channel(this->tx_handle_);
this->tx_handle_ = nullptr;
this->parent_->unlock();
return err;
}
if (this->i2s_event_queue_ == nullptr) {
this->i2s_event_queue_ = xQueueCreate(1, sizeof(bool));
this->i2s_event_queue_ = xQueueCreate(I2S_EVENT_QUEUE_COUNT, sizeof(int64_t));
}
const i2s_event_callbacks_t callbacks = {
.on_send_q_ovf = i2s_overflow_cb,
};
i2s_channel_register_event_callback(this->tx_handle_, &callbacks, this);
/* Before reading data, start the TX channel first */
i2s_channel_enable(this->tx_handle_);
if (err != ESP_OK) {
i2s_del_channel(this->tx_handle_);
this->parent_->unlock();
}
#endif
return err;
}
void I2SAudioSpeaker::delete_task_(size_t buffer_size) {
this->audio_ring_buffer_.reset(); // Releases ownership of the shared_ptr
#ifndef USE_I2S_LEGACY
bool IRAM_ATTR I2SAudioSpeaker::i2s_on_sent_cb(i2s_chan_handle_t handle, i2s_event_data_t *event, void *user_ctx) {
int64_t now = esp_timer_get_time();
if (this->data_buffer_ != nullptr) {
RAMAllocator<uint8_t> allocator;
allocator.deallocate(this->data_buffer_, buffer_size);
this->data_buffer_ = nullptr;
BaseType_t need_yield1 = pdFALSE;
BaseType_t need_yield2 = pdFALSE;
BaseType_t need_yield3 = pdFALSE;
I2SAudioSpeaker *this_speaker = (I2SAudioSpeaker *) user_ctx;
if (xQueueIsQueueFullFromISR(this_speaker->i2s_event_queue_)) {
// Queue is full, so discard the oldest event and set the warning flag to inform the user
int64_t dummy;
xQueueReceiveFromISR(this_speaker->i2s_event_queue_, &dummy, &need_yield1);
xEventGroupSetBitsFromISR(this_speaker->event_group_, SpeakerEventGroupBits::WARN_DROPPED_EVENT, &need_yield2);
}
xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::STATE_STOPPED);
xQueueSendToBackFromISR(this_speaker->i2s_event_queue_, &now, &need_yield3);
this->task_created_ = false;
vTaskDelete(nullptr);
}
#ifndef USE_I2S_LEGACY
bool IRAM_ATTR I2SAudioSpeaker::i2s_overflow_cb(i2s_chan_handle_t handle, i2s_event_data_t *event, void *user_ctx) {
I2SAudioSpeaker *this_speaker = (I2SAudioSpeaker *) user_ctx;
bool overflow = true;
xQueueOverwrite(this_speaker->i2s_event_queue_, &overflow);
return false;
return need_yield1 | need_yield2 | need_yield3;
}
#endif
void I2SAudioSpeaker::stop_i2s_driver_() {
#ifdef USE_I2S_LEGACY
i2s_driver_uninstall(this->parent_->get_port());
#else
i2s_channel_disable(this->tx_handle_);
i2s_del_channel(this->tx_handle_);
this->tx_handle_ = nullptr;
#endif
this->parent_->unlock();
}
} // namespace i2s_audio
} // namespace esphome

View File

@ -72,70 +72,57 @@ class I2SAudioSpeaker : public I2SAudioOut, public speaker::Speaker, public Comp
protected:
/// @brief Function for the FreeRTOS task handling audio output.
/// After receiving the COMMAND_START signal, allocates space for the buffers, starts the I2S driver, and reads
/// audio from the ring buffer and writes audio to the I2S port. Stops immmiately after receiving the COMMAND_STOP
/// signal and stops only after the ring buffer is empty after receiving the COMMAND_STOP_GRACEFULLY signal. Stops if
/// the ring buffer hasn't read data for more than timeout_ milliseconds. When stopping, it deallocates the buffers,
/// stops the I2S driver, unlocks the I2S port, and deletes the task. It communicates the state and any errors via
/// event_group_.
/// Allocates space for the buffers, reads audio from the ring buffer and writes audio to the I2S port. Stops
/// immmiately after receiving the COMMAND_STOP signal and stops only after the ring buffer is empty after receiving
/// the COMMAND_STOP_GRACEFULLY signal. Stops if the ring buffer hasn't read data for more than timeout_ milliseconds.
/// When stopping, it deallocates the buffers. It communicates its state and any errors via ``event_group_``.
/// @param params I2SAudioSpeaker component
static void speaker_task(void *params);
/// @brief Sends a stop command to the speaker task via event_group_.
/// @brief Sends a stop command to the speaker task via ``event_group_``.
/// @param wait_on_empty If false, sends the COMMAND_STOP signal. If true, sends the COMMAND_STOP_GRACEFULLY signal.
void stop_(bool wait_on_empty);
/// @brief Sets the corresponding ERR_ESP event group bits.
/// @param err esp_err_t error code.
/// @return True if an ERR_ESP bit is set and false if err == ESP_OK
bool send_esp_err_to_event_group_(esp_err_t err);
#ifndef USE_I2S_LEGACY
static bool i2s_overflow_cb(i2s_chan_handle_t handle, i2s_event_data_t *event, void *user_ctx);
/// @brief Callback function used to send playback timestamps the to the speaker task.
/// @param handle (i2s_chan_handle_t)
/// @param event (i2s_event_data_t)
/// @param user_ctx (void*) User context pointer that the callback accesses
/// @return True if a higher priority task was interrupted
static bool i2s_on_sent_cb(i2s_chan_handle_t handle, i2s_event_data_t *event, void *user_ctx);
#endif
/// @brief Allocates the data buffer and ring buffer
/// @param data_buffer_size Number of bytes to allocate for the data buffer.
/// @param ring_buffer_size Number of bytes to allocate for the ring buffer.
/// @return ESP_ERR_NO_MEM if either buffer fails to allocate
/// ESP_OK if successful
esp_err_t allocate_buffers_(size_t data_buffer_size, size_t ring_buffer_size);
/// @brief Starts the ESP32 I2S driver.
/// Attempts to lock the I2S port, starts the I2S driver using the passed in stream information, and sets the data out
/// pin. If it fails, it will unlock the I2S port and uninstall the driver, if necessary.
/// pin. If it fails, it will unlock the I2S port and uninstalls the driver, if necessary.
/// @param audio_stream_info Stream information for the I2S driver.
/// @return ESP_ERR_NOT_ALLOWED if the I2S port can't play the incoming audio stream.
/// ESP_ERR_INVALID_STATE if the I2S port is already locked.
/// ESP_ERR_INVALID_ARG if nstalling the driver or setting the data outpin fails due to a parameter error.
/// ESP_ERR_INVALID_ARG if installing the driver or setting the data outpin fails due to a parameter error.
/// ESP_ERR_NO_MEM if the driver fails to install due to a memory allocation error.
/// ESP_FAIL if setting the data out pin fails due to an IO error ESP_OK if successful
/// ESP_FAIL if setting the data out pin fails due to an IO error
/// ESP_OK if successful
esp_err_t start_i2s_driver_(audio::AudioStreamInfo &audio_stream_info);
/// @brief Deletes the speaker's task.
/// Deallocates the data_buffer_ and audio_ring_buffer_, if necessary, and deletes the task. Should only be called by
/// the speaker_task itself.
/// @param buffer_size The allocated size of the data_buffer_.
void delete_task_(size_t buffer_size);
/// @brief Stops the I2S driver and unlocks the I2S port
void stop_i2s_driver_();
TaskHandle_t speaker_task_handle_{nullptr};
EventGroupHandle_t event_group_{nullptr};
QueueHandle_t i2s_event_queue_;
uint8_t *data_buffer_;
std::shared_ptr<RingBuffer> audio_ring_buffer_;
std::weak_ptr<RingBuffer> audio_ring_buffer_;
uint32_t buffer_duration_ms_;
optional<uint32_t> timeout_;
bool task_created_{false};
bool pause_state_{false};
int16_t q15_volume_factor_{INT16_MAX};
size_t bytes_written_{0};
audio::AudioStreamInfo current_stream_info_; // The currently loaded driver's stream info
#ifdef USE_I2S_LEGACY
#if SOC_I2S_SUPPORTS_DAC
@ -148,8 +135,6 @@ class I2SAudioSpeaker : public I2SAudioOut, public speaker::Speaker, public Comp
std::string i2s_comm_fmt_;
i2s_chan_handle_t tx_handle_;
#endif
uint32_t accumulated_frames_written_{0};
};
} // namespace i2s_audio