[audio, microphone] - Allow MicrophoneSource to passively capture/optimization (#8732)

This commit is contained in:
Kevin Ahrendt 2025-05-09 16:54:33 -05:00 committed by GitHub
parent 8399d894c1
commit bec9d91419
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 50 additions and 33 deletions

View File

@ -135,7 +135,7 @@ const char *audio_file_type_to_string(AudioFileType file_type);
void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor,
size_t samples_to_scale);
/// @brief Unpacks a quantized audio sample into a Q31 fixed point number.
/// @brief Unpacks a quantized audio sample into a Q31 fixed-point number.
/// @param data Pointer to uint8_t array containing the audio sample
/// @param bytes_per_sample The number of bytes per sample
/// @return Q31 sample
@ -160,5 +160,28 @@ inline int32_t unpack_audio_sample_to_q31(const uint8_t *data, size_t bytes_per_
return sample;
}
/// @brief Packs a Q31 fixed-point number as an audio sample with the specified number of bytes per sample.
/// Packs the most significant bits - no dithering is applied.
/// @param sample Q31 fixed-point number to pack
/// @param data Pointer to data array to store
/// @param bytes_per_sample The audio data's bytes per sample
inline void pack_q31_as_audio_sample(int32_t sample, uint8_t *data, size_t bytes_per_sample) {
if (bytes_per_sample == 1) {
data[0] = static_cast<uint8_t>(sample >> 24);
} else if (bytes_per_sample == 2) {
data[0] = static_cast<uint8_t>(sample >> 16);
data[1] = static_cast<uint8_t>(sample >> 24);
} else if (bytes_per_sample == 3) {
data[0] = static_cast<uint8_t>(sample >> 8);
data[1] = static_cast<uint8_t>(sample >> 16);
data[2] = static_cast<uint8_t>(sample >> 24);
} else if (bytes_per_sample == 4) {
data[0] = static_cast<uint8_t>(sample);
data[1] = static_cast<uint8_t>(sample >> 8);
data[2] = static_cast<uint8_t>(sample >> 16);
data[3] = static_cast<uint8_t>(sample >> 24);
}
}
} // namespace audio
} // namespace esphome

View File

@ -162,13 +162,22 @@ def final_validate_microphone_source_schema(
return _validate_audio_compatability
async def microphone_source_to_code(config):
async def microphone_source_to_code(config, passive=False):
"""Creates a MicrophoneSource variable for codegen.
Setting passive to true makes the MicrophoneSource never start/stop the microphone, but only receives audio when another component has actively started the Microphone. If false, then the microphone needs to be explicitly started/stopped.
Args:
config (Schema): Created with `microphone_source_schema` specifying bits per sample, channels, and gain factor
passive (bool): Enable passive mode for the MicrophoneSource
"""
mic = await cg.get_variable(config[CONF_MICROPHONE])
mic_source = cg.new_Pvariable(
config[CONF_ID],
mic,
config[CONF_BITS_PER_SAMPLE],
config[CONF_GAIN_FACTOR],
passive,
)
for channel in config[CONF_CHANNELS]:
cg.add(mic_source.add_channel(channel))

View File

@ -6,12 +6,10 @@ namespace microphone {
static const int32_t Q25_MAX_VALUE = (1 << 25) - 1;
static const int32_t Q25_MIN_VALUE = ~Q25_MAX_VALUE;
static const uint32_t HISTORY_VALUES = 32;
void MicrophoneSource::add_data_callback(std::function<void(const std::vector<uint8_t> &)> &&data_callback) {
std::function<void(const std::vector<uint8_t> &)> filtered_callback =
[this, data_callback](const std::vector<uint8_t> &data) {
if (this->enabled_) {
if (this->enabled_ || this->passive_) {
if (this->processed_samples_.use_count() == 0) {
// Create vector if its unused
this->processed_samples_ = std::make_shared<std::vector<uint8_t>>();
@ -32,13 +30,14 @@ audio::AudioStreamInfo MicrophoneSource::get_audio_stream_info() {
}
void MicrophoneSource::start() {
if (!this->enabled_) {
if (!this->enabled_ && !this->passive_) {
this->enabled_ = true;
this->mic_->start();
}
}
void MicrophoneSource::stop() {
if (this->enabled_) {
if (this->enabled_ && !this->passive_) {
this->enabled_ = false;
this->mic_->stop();
this->processed_samples_.reset();
@ -63,8 +62,9 @@ void MicrophoneSource::process_audio_(const std::vector<uint8_t> &data, std::vec
const size_t target_bytes_per_sample = (this->bits_per_sample_ + 7) / 8;
const size_t target_bytes_per_frame = target_bytes_per_sample * this->channels_.count();
filtered_data.reserve(target_bytes_per_frame * total_frames);
filtered_data.resize(0);
filtered_data.resize(target_bytes_per_frame * total_frames);
uint8_t *current_data = filtered_data.data();
for (uint32_t frame_index = 0; frame_index < total_frames; ++frame_index) {
for (uint32_t channel_index = 0; channel_index < source_channels; ++channel_index) {
@ -82,26 +82,10 @@ void MicrophoneSource::process_audio_(const std::vector<uint8_t> &data, std::vec
// Clamp ``sample`` in case gain multiplication overflows 25 bits
sample = clamp<int32_t>(sample, Q25_MIN_VALUE, Q25_MAX_VALUE); // Q25
// Copy ``target_bytes_per_sample`` bytes to the output buffer.
if (target_bytes_per_sample == 1) {
sample >>= 18; // Q25 -> Q7
filtered_data.push_back(static_cast<uint8_t>(sample));
} else if (target_bytes_per_sample == 2) {
sample >>= 10; // Q25 -> Q15
filtered_data.push_back(static_cast<uint8_t>(sample));
filtered_data.push_back(static_cast<uint8_t>(sample >> 8));
} else if (target_bytes_per_sample == 3) {
sample >>= 2; // Q25 -> Q23
filtered_data.push_back(static_cast<uint8_t>(sample));
filtered_data.push_back(static_cast<uint8_t>(sample >> 8));
filtered_data.push_back(static_cast<uint8_t>(sample >> 16));
} else {
sample *= (1 << 6); // Q25 -> Q31
filtered_data.push_back(static_cast<uint8_t>(sample));
filtered_data.push_back(static_cast<uint8_t>(sample >> 8));
filtered_data.push_back(static_cast<uint8_t>(sample >> 16));
filtered_data.push_back(static_cast<uint8_t>(sample >> 24));
}
sample *= (1 << 6); // Q25 -> Q31
audio::pack_q31_as_audio_sample(sample, current_data, target_bytes_per_sample);
current_data = current_data + target_bytes_per_sample;
}
}
}

View File

@ -35,8 +35,8 @@ class MicrophoneSource {
* Note that this class cannot convert sample rates!
*/
public:
MicrophoneSource(Microphone *mic, uint8_t bits_per_sample, int32_t gain_factor)
: mic_(mic), bits_per_sample_(bits_per_sample), gain_factor_(gain_factor) {}
MicrophoneSource(Microphone *mic, uint8_t bits_per_sample, int32_t gain_factor, bool passive)
: mic_(mic), bits_per_sample_(bits_per_sample), gain_factor_(gain_factor), passive_(passive) {}
/// @brief Enables a channel to be processed through the callback.
///
@ -59,8 +59,8 @@ class MicrophoneSource {
void start();
void stop();
bool is_running() const { return (this->mic_->is_running() && this->enabled_); }
bool is_stopped() const { return !this->enabled_; }
bool is_running() const { return (this->mic_->is_running() && (this->enabled_ || this->passive_)); }
bool is_stopped() const { return !this->is_running(); };
protected:
void process_audio_(const std::vector<uint8_t> &data, std::vector<uint8_t> &filtered_data);
@ -72,6 +72,7 @@ class MicrophoneSource {
std::bitset<8> channels_;
int32_t gain_factor_;
bool enabled_{false};
bool passive_{false};
};
} // namespace microphone