diff --git a/CODEOWNERS b/CODEOWNERS index 73973f420f..6f5eae1a9c 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -278,7 +278,7 @@ esphome/components/mdns/* @esphome/core esphome/components/media_player/* @jesserockz esphome/components/micro_wake_word/* @jesserockz @kahrendt esphome/components/micronova/* @jorre05 -esphome/components/microphone/* @jesserockz +esphome/components/microphone/* @jesserockz @kahrendt esphome/components/mics_4514/* @jesserockz esphome/components/midea/* @dudanov esphome/components/midea_ir/* @dudanov diff --git a/esphome/components/audio/__init__.py b/esphome/components/audio/__init__.py index f8ec8cbd85..9f08c81e77 100644 --- a/esphome/components/audio/__init__.py +++ b/esphome/components/audio/__init__.py @@ -48,6 +48,12 @@ def set_stream_limits( min_sample_rate: int = _UNDEF, max_sample_rate: int = _UNDEF, ): + """Sets the limits for the audio stream that audio component can handle + + When the component sinks audio (e.g., a speaker), these indicate the limits to the audio it can receive. + When the component sources audio (e.g., a microphone), these indicate the limits to the audio it can send. + """ + def set_limits_in_config(config): if min_bits_per_sample is not _UNDEF: config[CONF_MIN_BITS_PER_SAMPLE] = min_bits_per_sample @@ -69,43 +75,87 @@ def final_validate_audio_schema( name: str, *, audio_device: str, - bits_per_sample: int, - channels: int, - sample_rate: int, + bits_per_sample: int = _UNDEF, + channels: int = _UNDEF, + sample_rate: int = _UNDEF, + enabled_channels: list[int] = _UNDEF, + audio_device_issue: bool = False, ): + """Validates audio compatibility when passed between different components. + + The component derived from ``AUDIO_COMPONENT_SCHEMA`` should call ``set_stream_limits`` in a validator to specify its compatible settings + + - If audio_device_issue is True, then the error message indicates the user should adjust the AUDIO_COMPONENT_SCHEMA derived component's configuration to match the values passed to this function + - If audio_device_issue is False, then the error message indicates the user should adjust the configuration of the component calling this function, as it falls out of the valid stream limits + + Args: + name (str): Friendly name of the component calling this function with an audio component to validate + audio_device (str): The configuration parameter name that contains the ID of an AUDIO_COMPONENT_SCHEMA derived component to validate against + bits_per_sample (int, optional): The desired bits per sample + channels (int, optional): The desired number of channels + sample_rate (int, optional): The desired sample rate + enabled_channels (list[int], optional): The desired enabled channels + audio_device_issue (bool, optional): Format the error message to indicate the problem is in the configuration for the ``audio_device`` component. Defaults to False. + """ + def validate_audio_compatiblity(audio_config): audio_schema = {} - try: - cv.int_range( - min=audio_config.get(CONF_MIN_BITS_PER_SAMPLE), - max=audio_config.get(CONF_MAX_BITS_PER_SAMPLE), - )(bits_per_sample) - except cv.Invalid as exc: - raise cv.Invalid( - f"Invalid configuration for the {name} component. The {CONF_BITS_PER_SAMPLE} {str(exc)}" - ) from exc + if bits_per_sample is not _UNDEF: + try: + cv.int_range( + min=audio_config.get(CONF_MIN_BITS_PER_SAMPLE), + max=audio_config.get(CONF_MAX_BITS_PER_SAMPLE), + )(bits_per_sample) + except cv.Invalid as exc: + if audio_device_issue: + error_string = f"Invalid configuration for the specified {audio_device}. The {name} component requires {bits_per_sample} bits per sample." + else: + error_string = f"Invalid configuration for the {name} component. The {CONF_BITS_PER_SAMPLE} {str(exc)}" + raise cv.Invalid(error_string) from exc - try: - cv.int_range( - min=audio_config.get(CONF_MIN_CHANNELS), - max=audio_config.get(CONF_MAX_CHANNELS), - )(channels) - except cv.Invalid as exc: - raise cv.Invalid( - f"Invalid configuration for the {name} component. The {CONF_NUM_CHANNELS} {str(exc)}" - ) from exc + if channels is not _UNDEF: + try: + cv.int_range( + min=audio_config.get(CONF_MIN_CHANNELS), + max=audio_config.get(CONF_MAX_CHANNELS), + )(channels) + except cv.Invalid as exc: + if audio_device_issue: + error_string = f"Invalid configuration for the specified {audio_device}. The {name} component requires {channels} channels." + else: + error_string = f"Invalid configuration for the {name} component. The {CONF_NUM_CHANNELS} {str(exc)}" + raise cv.Invalid(error_string) from exc - try: - cv.int_range( - min=audio_config.get(CONF_MIN_SAMPLE_RATE), - max=audio_config.get(CONF_MAX_SAMPLE_RATE), - )(sample_rate) - return cv.Schema(audio_schema, extra=cv.ALLOW_EXTRA)(audio_config) - except cv.Invalid as exc: - raise cv.Invalid( - f"Invalid configuration for the {name} component. The {CONF_SAMPLE_RATE} {str(exc)}" - ) from exc + if sample_rate is not _UNDEF: + try: + cv.int_range( + min=audio_config.get(CONF_MIN_SAMPLE_RATE), + max=audio_config.get(CONF_MAX_SAMPLE_RATE), + )(sample_rate) + except cv.Invalid as exc: + if audio_device_issue: + error_string = f"Invalid configuration for the specified {audio_device}. The {name} component requires a {sample_rate} sample rate." + else: + error_string = f"Invalid configuration for the {name} component. The {CONF_SAMPLE_RATE} {str(exc)}" + raise cv.Invalid(error_string) from exc + + if enabled_channels is not _UNDEF: + for channel in enabled_channels: + try: + # Channels are 0-indexed + cv.int_range( + min=0, + max=audio_config.get(CONF_MAX_CHANNELS) - 1, + )(channel) + except cv.Invalid as exc: + if audio_device_issue: + error_string = f"Invalid configuration for the specified {audio_device}. The {name} component requires channel {channel}." + else: + error_string = f"Invalid configuration for the {name} component. Enabled channel {channel} {str(exc)}" + raise cv.Invalid(error_string) from exc + + return cv.Schema(audio_schema, extra=cv.ALLOW_EXTRA)(audio_config) return cv.Schema( { diff --git a/esphome/components/audio/audio_resampler.cpp b/esphome/components/audio/audio_resampler.cpp index a7621225a1..20d246f1e0 100644 --- a/esphome/components/audio/audio_resampler.cpp +++ b/esphome/components/audio/audio_resampler.cpp @@ -4,6 +4,8 @@ #include "esphome/core/hal.h" +#include + namespace esphome { namespace audio { diff --git a/esphome/components/audio/audio_resampler.h b/esphome/components/audio/audio_resampler.h index 7f4e987b4c..082ade3371 100644 --- a/esphome/components/audio/audio_resampler.h +++ b/esphome/components/audio/audio_resampler.h @@ -6,6 +6,7 @@ #include "audio_transfer_buffer.h" #include "esphome/core/defines.h" +#include "esphome/core/helpers.h" #include "esphome/core/ring_buffer.h" #ifdef USE_SPEAKER diff --git a/esphome/components/microphone/__init__.py b/esphome/components/microphone/__init__.py index 4e5471b117..b9d24bc4a7 100644 --- a/esphome/components/microphone/__init__.py +++ b/esphome/components/microphone/__init__.py @@ -1,12 +1,21 @@ from esphome import automation from esphome.automation import maybe_simple_id import esphome.codegen as cg +from esphome.components import audio import esphome.config_validation as cv -from esphome.const import CONF_ID, CONF_TRIGGER_ID +from esphome.const import ( + CONF_BITS_PER_SAMPLE, + CONF_CHANNELS, + CONF_GAIN_FACTOR, + CONF_ID, + CONF_MICROPHONE, + CONF_TRIGGER_ID, +) from esphome.core import CORE from esphome.coroutine import coroutine_with_priority -CODEOWNERS = ["@jesserockz"] +AUTO_LOAD = ["audio"] +CODEOWNERS = ["@jesserockz", "@kahrendt"] IS_PLATFORM_COMPONENT = True @@ -15,6 +24,7 @@ CONF_ON_DATA = "on_data" microphone_ns = cg.esphome_ns.namespace("microphone") Microphone = microphone_ns.class_("Microphone") +MicrophoneSource = microphone_ns.class_("MicrophoneSource") CaptureAction = microphone_ns.class_( "CaptureAction", automation.Action, cg.Parented.template(Microphone) @@ -37,6 +47,7 @@ IsCapturingCondition = microphone_ns.class_( async def setup_microphone_core_(var, config): for conf in config.get(CONF_ON_DATA, []): trigger = cg.new_Pvariable(conf[CONF_TRIGGER_ID], var) + # Future PR will change the vector type to uint8 await automation.build_automation( trigger, [(cg.std_vector.template(cg.int16).operator("ref").operator("const"), "x")], @@ -50,7 +61,7 @@ async def register_microphone(var, config): await setup_microphone_core_(var, config) -MICROPHONE_SCHEMA = cv.Schema( +MICROPHONE_SCHEMA = cv.Schema.extend(audio.AUDIO_COMPONENT_SCHEMA).extend( { cv.Optional(CONF_ON_DATA): automation.validate_automation( { @@ -64,7 +75,104 @@ MICROPHONE_SCHEMA = cv.Schema( MICROPHONE_ACTION_SCHEMA = maybe_simple_id({cv.GenerateID(): cv.use_id(Microphone)}) -async def media_player_action(config, action_id, template_arg, args): +def microphone_source_schema( + min_bits_per_sample: int = 16, + max_bits_per_sample: int = 16, + min_channels: int = 1, + max_channels: int = 1, +): + """Schema for a microphone source + + Components requesting microphone data should use this schema instead of accessing a microphone directly. + + Args: + min_bits_per_sample (int, optional): Minimum number of bits per sample the requesting component supports. Defaults to 16. + max_bits_per_sample (int, optional): Maximum number of bits per sample the requesting component supports. Defaults to 16. + min_channels (int, optional): Minimum number of channels the requesting component supports. Defaults to 1. + max_channels (int, optional): Maximum number of channels the requesting component supports. Defaults to 1. + """ + + def _validate_unique_channels(config): + if len(config) != len(set(config)): + raise cv.Invalid("Channels must be unique") + return config + + return cv.All( + cv.maybe_simple_value( + { + cv.GenerateID(CONF_ID): cv.declare_id(MicrophoneSource), + cv.Required(CONF_MICROPHONE): cv.use_id(Microphone), + cv.Optional(CONF_BITS_PER_SAMPLE, default=16): cv.int_range( + min_bits_per_sample, max_bits_per_sample + ), + cv.Optional(CONF_CHANNELS, default="0"): cv.All( + cv.ensure_list(cv.int_range(0, 7)), + cv.Length(min=min_channels, max=max_channels), + _validate_unique_channels, + ), + cv.Optional(CONF_GAIN_FACTOR, default="1"): cv.int_range(1, 64), + }, + key=CONF_MICROPHONE, + ), + ) + + +_UNDEF = object() + + +def final_validate_microphone_source_schema( + component_name: str, sample_rate: int = _UNDEF +): + """Validates that the microphone source can provide audio in the correct format. In particular it validates the sample rate and the enabled channels. + + Note that: + - MicrophoneSource class automatically handles converting bits per sample, so no need to validate + - microphone_source_schema already validates that channels are unique and specifies the max number of channels the component supports + + Args: + component_name (str): The name of the component requesting mic audio + sample_rate (int, optional): The sample rate the component requesting mic audio requires + """ + + def _validate_audio_compatability(config): + if sample_rate is not _UNDEF: + # Issues require changing the microphone configuration + # - Verifies sample rates match + audio.final_validate_audio_schema( + component_name, + audio_device=CONF_MICROPHONE, + sample_rate=sample_rate, + audio_device_issue=True, + )(config) + + # Issues require changing the MicrophoneSource configuration + # - Verifies that each of the enabled channels are available + audio.final_validate_audio_schema( + component_name, + audio_device=CONF_MICROPHONE, + enabled_channels=config[CONF_CHANNELS], + audio_device_issue=False, + )(config) + + return config + + return _validate_audio_compatability + + +async def microphone_source_to_code(config): + mic = await cg.get_variable(config[CONF_MICROPHONE]) + mic_source = cg.new_Pvariable( + config[CONF_ID], + mic, + config[CONF_BITS_PER_SAMPLE], + config[CONF_GAIN_FACTOR], + ) + for channel in config[CONF_CHANNELS]: + cg.add(mic_source.add_channel(channel)) + return mic_source + + +async def microphone_action(config, action_id, template_arg, args): var = cg.new_Pvariable(action_id, template_arg) await cg.register_parented(var, config[CONF_ID]) return var @@ -72,15 +180,15 @@ async def media_player_action(config, action_id, template_arg, args): automation.register_action( "microphone.capture", CaptureAction, MICROPHONE_ACTION_SCHEMA -)(media_player_action) +)(microphone_action) automation.register_action( "microphone.stop_capture", StopCaptureAction, MICROPHONE_ACTION_SCHEMA -)(media_player_action) +)(microphone_action) automation.register_condition( "microphone.is_capturing", IsCapturingCondition, MICROPHONE_ACTION_SCHEMA -)(media_player_action) +)(microphone_action) @coroutine_with_priority(100.0) diff --git a/esphome/components/microphone/microphone.h b/esphome/components/microphone/microphone.h index 914ad80bea..58552aa34a 100644 --- a/esphome/components/microphone/microphone.h +++ b/esphome/components/microphone/microphone.h @@ -1,5 +1,7 @@ #pragma once +#include "esphome/components/audio/audio.h" + #include #include #include @@ -28,9 +30,13 @@ class Microphone { bool is_running() const { return this->state_ == STATE_RUNNING; } bool is_stopped() const { return this->state_ == STATE_STOPPED; } + audio::AudioStreamInfo get_audio_stream_info() { return this->audio_stream_info_; } + protected: State state_{STATE_STOPPED}; + audio::AudioStreamInfo audio_stream_info_; + CallbackManager &)> data_callbacks_{}; }; diff --git a/esphome/components/microphone/microphone_source.cpp b/esphome/components/microphone/microphone_source.cpp new file mode 100644 index 0000000000..7e397348b9 --- /dev/null +++ b/esphome/components/microphone/microphone_source.cpp @@ -0,0 +1,96 @@ +#include "microphone_source.h" + +namespace esphome { +namespace microphone { + +void MicrophoneSource::add_data_callback(std::function &)> &&data_callback) { + std::function &)> filtered_callback = + [this, data_callback](const std::vector &data) { + if (this->enabled_) { + data_callback(this->process_audio_(data)); + } + }; + // Future PR will uncomment this! It requires changing the callback vector to an uint8_t in every component using a + // mic callback. + // this->mic_->add_data_callback(std::move(filtered_callback)); +} + +void MicrophoneSource::start() { + this->enabled_ = true; + this->mic_->start(); +} +void MicrophoneSource::stop() { + this->enabled_ = false; + this->mic_->stop(); +} + +std::vector MicrophoneSource::process_audio_(const std::vector &data) { + // Bit depth conversions are obtained by truncating bits or padding with zeros - no dithering is applied. + + const size_t source_bytes_per_sample = this->mic_->get_audio_stream_info().samples_to_bytes(1); + const size_t source_channels = this->mic_->get_audio_stream_info().get_channels(); + + const size_t source_bytes_per_frame = this->mic_->get_audio_stream_info().frames_to_bytes(1); + + const uint32_t total_frames = this->mic_->get_audio_stream_info().bytes_to_frames(data.size()); + const size_t target_bytes_per_sample = (this->bits_per_sample_ + 7) / 8; + const size_t target_bytes_per_frame = target_bytes_per_sample * this->channels_.count(); + + std::vector filtered_data; + filtered_data.reserve(target_bytes_per_frame * total_frames); + + const int32_t target_min_value = -(1 << (8 * target_bytes_per_sample - 1)); + const int32_t target_max_value = (1 << (8 * target_bytes_per_sample - 1)) - 1; + + for (size_t frame_index = 0; frame_index < total_frames; ++frame_index) { + for (size_t channel_index = 0; channel_index < source_channels; ++channel_index) { + if (this->channels_.test(channel_index)) { + // Channel's current sample is included in the target mask. Convert bits per sample, if necessary. + + size_t sample_index = frame_index * source_bytes_per_frame + channel_index * source_bytes_per_sample; + + int32_t sample = 0; + + // Copy the data into the most significant bits of the sample variable to ensure the sign bit is correct + uint8_t bit_offset = (4 - source_bytes_per_sample) * 8; + for (int i = 0; i < source_bytes_per_sample; ++i) { + sample |= data[sample_index + i] << bit_offset; + bit_offset += 8; + } + + // Shift data back to the least significant bits + if (source_bytes_per_sample >= target_bytes_per_sample) { + // Keep source bytes per sample of data so that the gain multiplication uses all significant bits instead of + // shifting to the target bytes per sample immediately, potentially losing information. + sample >>= (4 - source_bytes_per_sample) * 8; // ``source_bytes_per_sample`` bytes of valid data + } else { + // Keep padded zeros to match the target bytes per sample + sample >>= (4 - target_bytes_per_sample) * 8; // ``target_bytes_per_sample`` bytes of valid data + } + + // Apply gain using multiplication + sample *= this->gain_factor_; + + // Match target output bytes by shifting out the least significant bits + if (source_bytes_per_sample > target_bytes_per_sample) { + sample >>= 8 * (source_bytes_per_sample - + target_bytes_per_sample); // ``target_bytes_per_sample`` bytes of valid data + } + + // Clamp ``sample`` to the target bytes per sample range in case gain multiplication overflows + sample = clamp(sample, target_min_value, target_max_value); + + // Copy ``target_bytes_per_sample`` bytes to the output buffer. + for (int i = 0; i < target_bytes_per_sample; ++i) { + filtered_data.push_back(static_cast(sample)); + sample >>= 8; + } + } + } + } + + return filtered_data; +} + +} // namespace microphone +} // namespace esphome diff --git a/esphome/components/microphone/microphone_source.h b/esphome/components/microphone/microphone_source.h new file mode 100644 index 0000000000..028920f101 --- /dev/null +++ b/esphome/components/microphone/microphone_source.h @@ -0,0 +1,63 @@ +#pragma once + +#include +#include +#include +#include +#include +#include "microphone.h" + +namespace esphome { +namespace microphone { + +class MicrophoneSource { + /* + * @brief Helper class that handles converting raw microphone data to a requested format. + * Components requesting microphone audio should register a callback through this class instead of registering a + * callback directly with the microphone if a particular format is required. + * + * Raw microphone data may have a different number of bits per sample and number of channels than the requesting + * component needs. This class handles the conversion by: + * - Internally adds a callback to receive the raw microphone data + * - The ``process_audio_`` handles the raw data + * - Only the channels set in the ``channels_`` bitset are passed through + * - Passed through samples have the bits per sample converted + * - A gain factor is optionally applied to increase the volume - audio may clip! + * - The processed audio is passed to the callback of the component requesting microphone data + * - It tracks an internal enabled state, so it ignores raw microphone data when the component requesting + * microphone data is not actively requesting audio. + * + * Note that this class cannot convert sample rates! + */ + public: + MicrophoneSource(Microphone *mic, uint8_t bits_per_sample, int32_t gain_factor) + : mic_(mic), bits_per_sample_(bits_per_sample), gain_factor_(gain_factor) {} + + /// @brief Enables a channel to be processed through the callback. + /// + /// If the microphone component only has reads from one channel, it is always in channel number 0, regardless if it + /// represents left or right. If the microphone reads from both left and right, channel number 0 and 1 represent the + /// left and right channels respectively. + /// + /// @param channel 0-indexed channel number to enable + void add_channel(uint8_t channel) { this->channels_.set(channel); } + + void add_data_callback(std::function &)> &&data_callback); + + void start(); + void stop(); + bool is_running() const { return (this->mic_->is_running() && this->enabled_); } + bool is_stopped() const { return !this->enabled_; } + + protected: + std::vector process_audio_(const std::vector &data); + + Microphone *mic_; + uint8_t bits_per_sample_; + std::bitset<8> channels_; + int32_t gain_factor_; + bool enabled_{false}; +}; + +} // namespace microphone +} // namespace esphome