[i2s_audio, mixer, resampler, speaker] Simplify duration played callback (#8703)

This commit is contained in:
Kevin Ahrendt 2025-05-06 23:42:59 -05:00 committed by GitHub
parent 75496849eb
commit e988762576
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 35 additions and 85 deletions

View File

@ -14,6 +14,8 @@
#include "esphome/core/hal.h"
#include "esphome/core/log.h"
#include "esp_timer.h"
namespace esphome {
namespace i2s_audio {
@ -366,25 +368,15 @@ void I2SAudioSpeaker::speaker_task(void *params) {
bytes_to_write, &bytes_written, pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS * 5));
#endif
uint32_t write_timestamp = micros();
int64_t now = esp_timer_get_time();
if (bytes_written != bytes_to_write) {
xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_SIZE);
}
bytes_read -= bytes_written;
this_speaker->accumulated_frames_written_ += audio_stream_info.bytes_to_frames(bytes_written);
const uint32_t new_playback_ms =
audio_stream_info.frames_to_milliseconds_with_remainder(&this_speaker->accumulated_frames_written_);
const uint32_t remainder_us =
audio_stream_info.frames_to_microseconds(this_speaker->accumulated_frames_written_);
uint32_t pending_frames =
audio_stream_info.bytes_to_frames(bytes_read + this_speaker->audio_ring_buffer_->available());
const uint32_t pending_ms = audio_stream_info.frames_to_milliseconds_with_remainder(&pending_frames);
this_speaker->audio_output_callback_(new_playback_ms, remainder_us, pending_ms, write_timestamp);
this_speaker->audio_output_callback_(audio_stream_info.bytes_to_frames(bytes_written),
now + dma_buffers_duration_ms * 1000);
tx_dma_underflow = false;
last_data_received_time = millis();

View File

@ -53,14 +53,15 @@ void SourceSpeaker::dump_config() {
}
void SourceSpeaker::setup() {
this->parent_->get_output_speaker()->add_audio_output_callback(
[this](uint32_t new_playback_ms, uint32_t remainder_us, uint32_t pending_ms, uint32_t write_timestamp) {
uint32_t personal_playback_ms = std::min(new_playback_ms, this->pending_playback_ms_);
if (personal_playback_ms > 0) {
this->pending_playback_ms_ -= personal_playback_ms;
this->audio_output_callback_(personal_playback_ms, remainder_us, this->pending_playback_ms_, write_timestamp);
}
});
this->parent_->get_output_speaker()->add_audio_output_callback([this](uint32_t new_frames, int64_t write_timestamp) {
// The SourceSpeaker may not have included any audio in the mixed output, so verify there were pending frames
uint32_t speakers_playback_frames = std::min(new_frames, this->pending_playback_frames_);
this->pending_playback_frames_ -= speakers_playback_frames;
if (speakers_playback_frames > 0) {
this->audio_output_callback_(speakers_playback_frames, write_timestamp);
}
});
}
void SourceSpeaker::loop() {
@ -153,6 +154,7 @@ esp_err_t SourceSpeaker::start_() {
}
}
this->pending_playback_frames_ = 0; // reset
return this->parent_->start(this->audio_stream_info_);
}
@ -542,11 +544,7 @@ void MixerSpeaker::audio_mixer_task(void *params) {
// Update source speaker buffer length
transfer_buffers_with_data[0]->decrease_buffer_length(active_stream_info.frames_to_bytes(frames_to_mix));
speakers_with_data[0]->accumulated_frames_read_ += frames_to_mix;
// Add new audio duration to the source speaker pending playback
speakers_with_data[0]->pending_playback_ms_ +=
active_stream_info.frames_to_milliseconds_with_remainder(&speakers_with_data[0]->accumulated_frames_read_);
speakers_with_data[0]->pending_playback_frames_ += frames_to_mix;
// Update output transfer buffer length
output_transfer_buffer->increase_buffer_length(
@ -586,10 +584,6 @@ void MixerSpeaker::audio_mixer_task(void *params) {
reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end()),
this_mixer->audio_stream_info_.value(), frames_to_mix);
speakers_with_data[i]->pending_playback_ms_ +=
speakers_with_data[i]->get_audio_stream_info().frames_to_milliseconds_with_remainder(
&speakers_with_data[i]->accumulated_frames_read_);
if (i != transfer_buffers_with_data.size() - 1) {
// Need to mix more streams together, point primary buffer and stream info to the already mixed output
primary_buffer = reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end());
@ -601,11 +595,7 @@ void MixerSpeaker::audio_mixer_task(void *params) {
for (int i = 0; i < transfer_buffers_with_data.size(); ++i) {
transfer_buffers_with_data[i]->decrease_buffer_length(
speakers_with_data[i]->get_audio_stream_info().frames_to_bytes(frames_to_mix));
speakers_with_data[i]->accumulated_frames_read_ += frames_to_mix;
speakers_with_data[i]->pending_playback_ms_ +=
speakers_with_data[i]->get_audio_stream_info().frames_to_milliseconds_with_remainder(
&speakers_with_data[i]->accumulated_frames_read_);
speakers_with_data[i]->pending_playback_frames_ += frames_to_mix;
}
// Update output transfer buffer length

View File

@ -114,9 +114,7 @@ class SourceSpeaker : public speaker::Speaker, public Component {
uint32_t ducking_transition_samples_remaining_{0};
uint32_t samples_per_ducking_step_{0};
uint32_t accumulated_frames_read_{0};
uint32_t pending_playback_ms_{0};
uint32_t pending_playback_frames_{0};
};
class MixerSpeaker : public Component {

View File

@ -43,13 +43,18 @@ void ResamplerSpeaker::setup() {
return;
}
this->output_speaker_->add_audio_output_callback(
[this](uint32_t new_playback_ms, uint32_t remainder_us, uint32_t pending_ms, uint32_t write_timestamp) {
int32_t adjustment = this->playback_differential_ms_;
this->playback_differential_ms_ -= adjustment;
int32_t adjusted_playback_ms = static_cast<int32_t>(new_playback_ms) + adjustment;
this->audio_output_callback_(adjusted_playback_ms, remainder_us, pending_ms, write_timestamp);
});
this->output_speaker_->add_audio_output_callback([this](uint32_t new_frames, int64_t write_timestamp) {
if (this->audio_stream_info_.get_sample_rate() != this->target_stream_info_.get_sample_rate()) {
// Convert the number of frames from the target sample rate to the source sample rate. Track the remainder to
// avoid losing frames from integer division truncation.
const uint64_t numerator = new_frames * this->audio_stream_info_.get_sample_rate() + this->callback_remainder_;
const uint64_t denominator = this->target_stream_info_.get_sample_rate();
this->callback_remainder_ = numerator % denominator;
this->audio_output_callback_(numerator / denominator, write_timestamp);
} else {
this->audio_output_callback_(new_frames, write_timestamp);
}
});
}
void ResamplerSpeaker::loop() {
@ -283,7 +288,6 @@ void ResamplerSpeaker::resample_task(void *params) {
xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::ERR_ESP_NOT_SUPPORTED);
}
this_resampler->playback_differential_ms_ = 0;
while (err == ESP_OK) {
uint32_t event_bits = xEventGroupGetBits(this_resampler->event_group_);
@ -295,8 +299,6 @@ void ResamplerSpeaker::resample_task(void *params) {
int32_t ms_differential = 0;
audio::AudioResamplerState resampler_state = resampler->resample(false, &ms_differential);
this_resampler->playback_differential_ms_ += ms_differential;
if (resampler_state == audio::AudioResamplerState::FINISHED) {
break;
} else if (resampler_state == audio::AudioResamplerState::FAILED) {

View File

@ -100,7 +100,7 @@ class ResamplerSpeaker : public Component, public speaker::Speaker {
uint32_t buffer_duration_ms_;
int32_t playback_differential_ms_{0};
uint64_t callback_remainder_{0};
};
} // namespace resampler

View File

@ -106,16 +106,6 @@ void SpeakerMediaPlayer::setup() {
ESP_LOGE(TAG, "Failed to create media pipeline");
this->mark_failed();
}
// Setup callback to track the duration of audio played by the media pipeline
this->media_speaker_->add_audio_output_callback(
[this](uint32_t new_playback_ms, uint32_t remainder_us, uint32_t pending_ms, uint32_t write_timestamp) {
this->playback_ms_ += new_playback_ms;
this->remainder_us_ = remainder_us;
this->pending_ms_ = pending_ms;
this->last_audio_write_timestamp_ = write_timestamp;
this->playback_us_ = this->playback_ms_ * 1000 + this->remainder_us_;
});
}
ESP_LOGI(TAG, "Set up speaker media player");
@ -321,7 +311,6 @@ void SpeakerMediaPlayer::loop() {
AudioPipelineState old_media_pipeline_state = this->media_pipeline_state_;
if (this->media_pipeline_ != nullptr) {
this->media_pipeline_state_ = this->media_pipeline_->process_state();
this->decoded_playback_ms_ = this->media_pipeline_->get_playback_ms();
}
if (this->media_pipeline_state_ == AudioPipelineState::ERROR_READING) {
@ -379,13 +368,6 @@ void SpeakerMediaPlayer::loop() {
} else if (this->media_pipeline_state_ == AudioPipelineState::PLAYING) {
this->state = media_player::MEDIA_PLAYER_STATE_PLAYING;
} else if (this->media_pipeline_state_ == AudioPipelineState::STOPPED) {
// Reset playback durations
this->decoded_playback_ms_ = 0;
this->playback_us_ = 0;
this->playback_ms_ = 0;
this->remainder_us_ = 0;
this->pending_ms_ = 0;
if (!media_playlist_.empty()) {
uint32_t timeout_ms = 0;
if (old_media_pipeline_state == AudioPipelineState::PLAYING) {

View File

@ -73,10 +73,6 @@ class SpeakerMediaPlayer : public Component, public media_player::MediaPlayer {
void play_file(audio::AudioFile *media_file, bool announcement, bool enqueue);
uint32_t get_playback_ms() const { return this->playback_ms_; }
uint32_t get_playback_us() const { return this->playback_us_; }
uint32_t get_decoded_playback_ms() const { return this->decoded_playback_ms_; }
void set_playlist_delay_ms(AudioPipelineType pipeline_type, uint32_t delay_ms);
protected:
@ -141,13 +137,6 @@ class SpeakerMediaPlayer : public Component, public media_player::MediaPlayer {
Trigger<> *mute_trigger_ = new Trigger<>();
Trigger<> *unmute_trigger_ = new Trigger<>();
Trigger<float> *volume_trigger_ = new Trigger<float>();
uint32_t decoded_playback_ms_{0};
uint32_t playback_us_{0};
uint32_t playback_ms_{0};
uint32_t remainder_us_{0};
uint32_t pending_ms_{0};
uint32_t last_audio_write_timestamp_{0};
};
} // namespace speaker

View File

@ -104,12 +104,9 @@ class Speaker {
/// Callback function for sending the duration of the audio written to the speaker since the last callback.
/// Parameters:
/// - Duration in milliseconds. Never rounded and should always be less than or equal to the actual duration.
/// - Remainder duration in microseconds. Rounded duration after subtracting the previous parameter from the actual
/// duration.
/// - Duration of remaining, unwritten audio buffered in the speaker in milliseconds.
/// - System time in microseconds when the last write was completed.
void add_audio_output_callback(std::function<void(uint32_t, uint32_t, uint32_t, uint32_t)> &&callback) {
/// - Frames played
/// - System time in microseconds when the frames were written to the DAC
void add_audio_output_callback(std::function<void(uint32_t, int64_t)> &&callback) {
this->audio_output_callback_.add(std::move(callback));
}
@ -123,7 +120,7 @@ class Speaker {
audio_dac::AudioDac *audio_dac_{nullptr};
#endif
CallbackManager<void(uint32_t, uint32_t, uint32_t, uint32_t)> audio_output_callback_{};
CallbackManager<void(uint32_t, int64_t)> audio_output_callback_{};
};
} // namespace speaker