mirror of
https://github.com/esphome/esphome.git
synced 2025-07-28 14:16:40 +00:00
[audio, microphone] - Allow MicrophoneSource to passively capture/optimization (#8732)
This commit is contained in:
parent
8399d894c1
commit
bec9d91419
@ -135,7 +135,7 @@ const char *audio_file_type_to_string(AudioFileType file_type);
|
||||
void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor,
|
||||
size_t samples_to_scale);
|
||||
|
||||
/// @brief Unpacks a quantized audio sample into a Q31 fixed point number.
|
||||
/// @brief Unpacks a quantized audio sample into a Q31 fixed-point number.
|
||||
/// @param data Pointer to uint8_t array containing the audio sample
|
||||
/// @param bytes_per_sample The number of bytes per sample
|
||||
/// @return Q31 sample
|
||||
@ -160,5 +160,28 @@ inline int32_t unpack_audio_sample_to_q31(const uint8_t *data, size_t bytes_per_
|
||||
return sample;
|
||||
}
|
||||
|
||||
/// @brief Packs a Q31 fixed-point number as an audio sample with the specified number of bytes per sample.
|
||||
/// Packs the most significant bits - no dithering is applied.
|
||||
/// @param sample Q31 fixed-point number to pack
|
||||
/// @param data Pointer to data array to store
|
||||
/// @param bytes_per_sample The audio data's bytes per sample
|
||||
inline void pack_q31_as_audio_sample(int32_t sample, uint8_t *data, size_t bytes_per_sample) {
|
||||
if (bytes_per_sample == 1) {
|
||||
data[0] = static_cast<uint8_t>(sample >> 24);
|
||||
} else if (bytes_per_sample == 2) {
|
||||
data[0] = static_cast<uint8_t>(sample >> 16);
|
||||
data[1] = static_cast<uint8_t>(sample >> 24);
|
||||
} else if (bytes_per_sample == 3) {
|
||||
data[0] = static_cast<uint8_t>(sample >> 8);
|
||||
data[1] = static_cast<uint8_t>(sample >> 16);
|
||||
data[2] = static_cast<uint8_t>(sample >> 24);
|
||||
} else if (bytes_per_sample == 4) {
|
||||
data[0] = static_cast<uint8_t>(sample);
|
||||
data[1] = static_cast<uint8_t>(sample >> 8);
|
||||
data[2] = static_cast<uint8_t>(sample >> 16);
|
||||
data[3] = static_cast<uint8_t>(sample >> 24);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace audio
|
||||
} // namespace esphome
|
||||
|
@ -162,13 +162,22 @@ def final_validate_microphone_source_schema(
|
||||
return _validate_audio_compatability
|
||||
|
||||
|
||||
async def microphone_source_to_code(config):
|
||||
async def microphone_source_to_code(config, passive=False):
|
||||
"""Creates a MicrophoneSource variable for codegen.
|
||||
|
||||
Setting passive to true makes the MicrophoneSource never start/stop the microphone, but only receives audio when another component has actively started the Microphone. If false, then the microphone needs to be explicitly started/stopped.
|
||||
|
||||
Args:
|
||||
config (Schema): Created with `microphone_source_schema` specifying bits per sample, channels, and gain factor
|
||||
passive (bool): Enable passive mode for the MicrophoneSource
|
||||
"""
|
||||
mic = await cg.get_variable(config[CONF_MICROPHONE])
|
||||
mic_source = cg.new_Pvariable(
|
||||
config[CONF_ID],
|
||||
mic,
|
||||
config[CONF_BITS_PER_SAMPLE],
|
||||
config[CONF_GAIN_FACTOR],
|
||||
passive,
|
||||
)
|
||||
for channel in config[CONF_CHANNELS]:
|
||||
cg.add(mic_source.add_channel(channel))
|
||||
|
@ -6,12 +6,10 @@ namespace microphone {
|
||||
static const int32_t Q25_MAX_VALUE = (1 << 25) - 1;
|
||||
static const int32_t Q25_MIN_VALUE = ~Q25_MAX_VALUE;
|
||||
|
||||
static const uint32_t HISTORY_VALUES = 32;
|
||||
|
||||
void MicrophoneSource::add_data_callback(std::function<void(const std::vector<uint8_t> &)> &&data_callback) {
|
||||
std::function<void(const std::vector<uint8_t> &)> filtered_callback =
|
||||
[this, data_callback](const std::vector<uint8_t> &data) {
|
||||
if (this->enabled_) {
|
||||
if (this->enabled_ || this->passive_) {
|
||||
if (this->processed_samples_.use_count() == 0) {
|
||||
// Create vector if its unused
|
||||
this->processed_samples_ = std::make_shared<std::vector<uint8_t>>();
|
||||
@ -32,13 +30,14 @@ audio::AudioStreamInfo MicrophoneSource::get_audio_stream_info() {
|
||||
}
|
||||
|
||||
void MicrophoneSource::start() {
|
||||
if (!this->enabled_) {
|
||||
if (!this->enabled_ && !this->passive_) {
|
||||
this->enabled_ = true;
|
||||
this->mic_->start();
|
||||
}
|
||||
}
|
||||
|
||||
void MicrophoneSource::stop() {
|
||||
if (this->enabled_) {
|
||||
if (this->enabled_ && !this->passive_) {
|
||||
this->enabled_ = false;
|
||||
this->mic_->stop();
|
||||
this->processed_samples_.reset();
|
||||
@ -63,8 +62,9 @@ void MicrophoneSource::process_audio_(const std::vector<uint8_t> &data, std::vec
|
||||
const size_t target_bytes_per_sample = (this->bits_per_sample_ + 7) / 8;
|
||||
const size_t target_bytes_per_frame = target_bytes_per_sample * this->channels_.count();
|
||||
|
||||
filtered_data.reserve(target_bytes_per_frame * total_frames);
|
||||
filtered_data.resize(0);
|
||||
filtered_data.resize(target_bytes_per_frame * total_frames);
|
||||
|
||||
uint8_t *current_data = filtered_data.data();
|
||||
|
||||
for (uint32_t frame_index = 0; frame_index < total_frames; ++frame_index) {
|
||||
for (uint32_t channel_index = 0; channel_index < source_channels; ++channel_index) {
|
||||
@ -82,26 +82,10 @@ void MicrophoneSource::process_audio_(const std::vector<uint8_t> &data, std::vec
|
||||
// Clamp ``sample`` in case gain multiplication overflows 25 bits
|
||||
sample = clamp<int32_t>(sample, Q25_MIN_VALUE, Q25_MAX_VALUE); // Q25
|
||||
|
||||
// Copy ``target_bytes_per_sample`` bytes to the output buffer.
|
||||
if (target_bytes_per_sample == 1) {
|
||||
sample >>= 18; // Q25 -> Q7
|
||||
filtered_data.push_back(static_cast<uint8_t>(sample));
|
||||
} else if (target_bytes_per_sample == 2) {
|
||||
sample >>= 10; // Q25 -> Q15
|
||||
filtered_data.push_back(static_cast<uint8_t>(sample));
|
||||
filtered_data.push_back(static_cast<uint8_t>(sample >> 8));
|
||||
} else if (target_bytes_per_sample == 3) {
|
||||
sample >>= 2; // Q25 -> Q23
|
||||
filtered_data.push_back(static_cast<uint8_t>(sample));
|
||||
filtered_data.push_back(static_cast<uint8_t>(sample >> 8));
|
||||
filtered_data.push_back(static_cast<uint8_t>(sample >> 16));
|
||||
} else {
|
||||
sample *= (1 << 6); // Q25 -> Q31
|
||||
filtered_data.push_back(static_cast<uint8_t>(sample));
|
||||
filtered_data.push_back(static_cast<uint8_t>(sample >> 8));
|
||||
filtered_data.push_back(static_cast<uint8_t>(sample >> 16));
|
||||
filtered_data.push_back(static_cast<uint8_t>(sample >> 24));
|
||||
}
|
||||
sample *= (1 << 6); // Q25 -> Q31
|
||||
|
||||
audio::pack_q31_as_audio_sample(sample, current_data, target_bytes_per_sample);
|
||||
current_data = current_data + target_bytes_per_sample;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -35,8 +35,8 @@ class MicrophoneSource {
|
||||
* Note that this class cannot convert sample rates!
|
||||
*/
|
||||
public:
|
||||
MicrophoneSource(Microphone *mic, uint8_t bits_per_sample, int32_t gain_factor)
|
||||
: mic_(mic), bits_per_sample_(bits_per_sample), gain_factor_(gain_factor) {}
|
||||
MicrophoneSource(Microphone *mic, uint8_t bits_per_sample, int32_t gain_factor, bool passive)
|
||||
: mic_(mic), bits_per_sample_(bits_per_sample), gain_factor_(gain_factor), passive_(passive) {}
|
||||
|
||||
/// @brief Enables a channel to be processed through the callback.
|
||||
///
|
||||
@ -59,8 +59,8 @@ class MicrophoneSource {
|
||||
|
||||
void start();
|
||||
void stop();
|
||||
bool is_running() const { return (this->mic_->is_running() && this->enabled_); }
|
||||
bool is_stopped() const { return !this->enabled_; }
|
||||
bool is_running() const { return (this->mic_->is_running() && (this->enabled_ || this->passive_)); }
|
||||
bool is_stopped() const { return !this->is_running(); };
|
||||
|
||||
protected:
|
||||
void process_audio_(const std::vector<uint8_t> &data, std::vector<uint8_t> &filtered_data);
|
||||
@ -72,6 +72,7 @@ class MicrophoneSource {
|
||||
std::bitset<8> channels_;
|
||||
int32_t gain_factor_;
|
||||
bool enabled_{false};
|
||||
bool passive_{false};
|
||||
};
|
||||
|
||||
} // namespace microphone
|
||||
|
Loading…
x
Reference in New Issue
Block a user