diff --git a/esphome/components/audio/audio.h b/esphome/components/audio/audio.h index 2c556c68e2..95c31872e3 100644 --- a/esphome/components/audio/audio.h +++ b/esphome/components/audio/audio.h @@ -135,7 +135,7 @@ const char *audio_file_type_to_string(AudioFileType file_type); void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor, size_t samples_to_scale); -/// @brief Unpacks a quantized audio sample into a Q31 fixed point number. +/// @brief Unpacks a quantized audio sample into a Q31 fixed-point number. /// @param data Pointer to uint8_t array containing the audio sample /// @param bytes_per_sample The number of bytes per sample /// @return Q31 sample @@ -160,5 +160,28 @@ inline int32_t unpack_audio_sample_to_q31(const uint8_t *data, size_t bytes_per_ return sample; } +/// @brief Packs a Q31 fixed-point number as an audio sample with the specified number of bytes per sample. +/// Packs the most significant bits - no dithering is applied. +/// @param sample Q31 fixed-point number to pack +/// @param data Pointer to data array to store +/// @param bytes_per_sample The audio data's bytes per sample +inline void pack_q31_as_audio_sample(int32_t sample, uint8_t *data, size_t bytes_per_sample) { + if (bytes_per_sample == 1) { + data[0] = static_cast(sample >> 24); + } else if (bytes_per_sample == 2) { + data[0] = static_cast(sample >> 16); + data[1] = static_cast(sample >> 24); + } else if (bytes_per_sample == 3) { + data[0] = static_cast(sample >> 8); + data[1] = static_cast(sample >> 16); + data[2] = static_cast(sample >> 24); + } else if (bytes_per_sample == 4) { + data[0] = static_cast(sample); + data[1] = static_cast(sample >> 8); + data[2] = static_cast(sample >> 16); + data[3] = static_cast(sample >> 24); + } +} + } // namespace audio } // namespace esphome diff --git a/esphome/components/microphone/__init__.py b/esphome/components/microphone/__init__.py index 2fda99af05..29bdcfa3f3 100644 --- a/esphome/components/microphone/__init__.py +++ b/esphome/components/microphone/__init__.py @@ -162,13 +162,22 @@ def final_validate_microphone_source_schema( return _validate_audio_compatability -async def microphone_source_to_code(config): +async def microphone_source_to_code(config, passive=False): + """Creates a MicrophoneSource variable for codegen. + + Setting passive to true makes the MicrophoneSource never start/stop the microphone, but only receives audio when another component has actively started the Microphone. If false, then the microphone needs to be explicitly started/stopped. + + Args: + config (Schema): Created with `microphone_source_schema` specifying bits per sample, channels, and gain factor + passive (bool): Enable passive mode for the MicrophoneSource + """ mic = await cg.get_variable(config[CONF_MICROPHONE]) mic_source = cg.new_Pvariable( config[CONF_ID], mic, config[CONF_BITS_PER_SAMPLE], config[CONF_GAIN_FACTOR], + passive, ) for channel in config[CONF_CHANNELS]: cg.add(mic_source.add_channel(channel)) diff --git a/esphome/components/microphone/microphone_source.cpp b/esphome/components/microphone/microphone_source.cpp index 1ea0deb22b..00efcf22a1 100644 --- a/esphome/components/microphone/microphone_source.cpp +++ b/esphome/components/microphone/microphone_source.cpp @@ -6,12 +6,10 @@ namespace microphone { static const int32_t Q25_MAX_VALUE = (1 << 25) - 1; static const int32_t Q25_MIN_VALUE = ~Q25_MAX_VALUE; -static const uint32_t HISTORY_VALUES = 32; - void MicrophoneSource::add_data_callback(std::function &)> &&data_callback) { std::function &)> filtered_callback = [this, data_callback](const std::vector &data) { - if (this->enabled_) { + if (this->enabled_ || this->passive_) { if (this->processed_samples_.use_count() == 0) { // Create vector if its unused this->processed_samples_ = std::make_shared>(); @@ -32,13 +30,14 @@ audio::AudioStreamInfo MicrophoneSource::get_audio_stream_info() { } void MicrophoneSource::start() { - if (!this->enabled_) { + if (!this->enabled_ && !this->passive_) { this->enabled_ = true; this->mic_->start(); } } + void MicrophoneSource::stop() { - if (this->enabled_) { + if (this->enabled_ && !this->passive_) { this->enabled_ = false; this->mic_->stop(); this->processed_samples_.reset(); @@ -63,8 +62,9 @@ void MicrophoneSource::process_audio_(const std::vector &data, std::vec const size_t target_bytes_per_sample = (this->bits_per_sample_ + 7) / 8; const size_t target_bytes_per_frame = target_bytes_per_sample * this->channels_.count(); - filtered_data.reserve(target_bytes_per_frame * total_frames); - filtered_data.resize(0); + filtered_data.resize(target_bytes_per_frame * total_frames); + + uint8_t *current_data = filtered_data.data(); for (uint32_t frame_index = 0; frame_index < total_frames; ++frame_index) { for (uint32_t channel_index = 0; channel_index < source_channels; ++channel_index) { @@ -82,26 +82,10 @@ void MicrophoneSource::process_audio_(const std::vector &data, std::vec // Clamp ``sample`` in case gain multiplication overflows 25 bits sample = clamp(sample, Q25_MIN_VALUE, Q25_MAX_VALUE); // Q25 - // Copy ``target_bytes_per_sample`` bytes to the output buffer. - if (target_bytes_per_sample == 1) { - sample >>= 18; // Q25 -> Q7 - filtered_data.push_back(static_cast(sample)); - } else if (target_bytes_per_sample == 2) { - sample >>= 10; // Q25 -> Q15 - filtered_data.push_back(static_cast(sample)); - filtered_data.push_back(static_cast(sample >> 8)); - } else if (target_bytes_per_sample == 3) { - sample >>= 2; // Q25 -> Q23 - filtered_data.push_back(static_cast(sample)); - filtered_data.push_back(static_cast(sample >> 8)); - filtered_data.push_back(static_cast(sample >> 16)); - } else { - sample *= (1 << 6); // Q25 -> Q31 - filtered_data.push_back(static_cast(sample)); - filtered_data.push_back(static_cast(sample >> 8)); - filtered_data.push_back(static_cast(sample >> 16)); - filtered_data.push_back(static_cast(sample >> 24)); - } + sample *= (1 << 6); // Q25 -> Q31 + + audio::pack_q31_as_audio_sample(sample, current_data, target_bytes_per_sample); + current_data = current_data + target_bytes_per_sample; } } } diff --git a/esphome/components/microphone/microphone_source.h b/esphome/components/microphone/microphone_source.h index 7f8a37b360..228f2d9dc3 100644 --- a/esphome/components/microphone/microphone_source.h +++ b/esphome/components/microphone/microphone_source.h @@ -35,8 +35,8 @@ class MicrophoneSource { * Note that this class cannot convert sample rates! */ public: - MicrophoneSource(Microphone *mic, uint8_t bits_per_sample, int32_t gain_factor) - : mic_(mic), bits_per_sample_(bits_per_sample), gain_factor_(gain_factor) {} + MicrophoneSource(Microphone *mic, uint8_t bits_per_sample, int32_t gain_factor, bool passive) + : mic_(mic), bits_per_sample_(bits_per_sample), gain_factor_(gain_factor), passive_(passive) {} /// @brief Enables a channel to be processed through the callback. /// @@ -59,8 +59,8 @@ class MicrophoneSource { void start(); void stop(); - bool is_running() const { return (this->mic_->is_running() && this->enabled_); } - bool is_stopped() const { return !this->enabled_; } + bool is_running() const { return (this->mic_->is_running() && (this->enabled_ || this->passive_)); } + bool is_stopped() const { return !this->is_running(); }; protected: void process_audio_(const std::vector &data, std::vector &filtered_data); @@ -72,6 +72,7 @@ class MicrophoneSource { std::bitset<8> channels_; int32_t gain_factor_; bool enabled_{false}; + bool passive_{false}; }; } // namespace microphone