[sound_level] Add a new sound level sensor (#8737)

Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
This commit is contained in:
Kevin Ahrendt 2025-05-11 15:51:49 -05:00 committed by GitHub
parent 7f59aff157
commit cdc1a7c646
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 429 additions and 1 deletions

View File

@ -398,6 +398,7 @@ esphome/components/smt100/* @piechade
esphome/components/sn74hc165/* @jesserockz
esphome/components/socket/* @esphome/core
esphome/components/sonoff_d1/* @anatoly-savchenkov
esphome/components/sound_level/* @kahrendt
esphome/components/speaker/* @jesserockz @kahrendt
esphome/components/speaker/media_player/* @kahrendt @synesthesiam
esphome/components/spi/* @clydebarrow @esphome/core

View File

@ -59,6 +59,7 @@ class MicrophoneSource {
void start();
void stop();
bool is_passive() const { return this->passive_; }
bool is_running() const { return (this->mic_->is_running() && (this->enabled_ || this->passive_)); }
bool is_stopped() const { return !this->is_running(); };
@ -72,7 +73,7 @@ class MicrophoneSource {
std::bitset<8> channels_;
int32_t gain_factor_;
bool enabled_{false};
bool passive_{false};
bool passive_; // Only pass audio if ``mic_`` is already running
};
} // namespace microphone

View File

@ -0,0 +1,97 @@
from esphome import automation
import esphome.codegen as cg
from esphome.components import microphone, sensor
import esphome.config_validation as cv
from esphome.const import (
CONF_ID,
CONF_MEASUREMENT_DURATION,
CONF_MICROPHONE,
DEVICE_CLASS_SOUND_PRESSURE,
PLATFORM_ESP32,
STATE_CLASS_MEASUREMENT,
UNIT_DECIBEL,
)
AUTOLOAD = ["audio"]
CODEOWNERS = ["@kahrendt"]
DEPENDENCIES = ["microphone"]
CONF_PASSIVE = "passive"
CONF_PEAK = "peak"
CONF_RMS = "rms"
sound_level_ns = cg.esphome_ns.namespace("sound_level")
SoundLevelComponent = sound_level_ns.class_("SoundLevelComponent", cg.Component)
StartAction = sound_level_ns.class_("StartAction", automation.Action)
StopAction = sound_level_ns.class_("StopAction", automation.Action)
CONFIG_SCHEMA = cv.All(
cv.Schema(
{
cv.GenerateID(): cv.declare_id(SoundLevelComponent),
cv.Optional(CONF_MEASUREMENT_DURATION, default="1000ms"): cv.All(
cv.positive_time_period_milliseconds,
cv.Range(
min=cv.TimePeriod(milliseconds=50),
max=cv.TimePeriod(seconds=60),
),
),
cv.Optional(
CONF_MICROPHONE, default={}
): microphone.microphone_source_schema(
min_bits_per_sample=16,
max_bits_per_sample=16,
),
cv.Required(CONF_PASSIVE): cv.boolean,
cv.Optional(CONF_PEAK): sensor.sensor_schema(
unit_of_measurement=UNIT_DECIBEL,
accuracy_decimals=1,
device_class=DEVICE_CLASS_SOUND_PRESSURE,
state_class=STATE_CLASS_MEASUREMENT,
),
cv.Optional(CONF_RMS): sensor.sensor_schema(
unit_of_measurement=UNIT_DECIBEL,
accuracy_decimals=1,
device_class=DEVICE_CLASS_SOUND_PRESSURE,
state_class=STATE_CLASS_MEASUREMENT,
),
}
).extend(cv.COMPONENT_SCHEMA),
cv.only_on([PLATFORM_ESP32]),
)
async def to_code(config):
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
mic_source = await microphone.microphone_source_to_code(
config[CONF_MICROPHONE], passive=config[CONF_PASSIVE]
)
cg.add(var.set_microphone_source(mic_source))
cg.add(var.set_measurement_duration(config[CONF_MEASUREMENT_DURATION]))
if peak_config := config.get(CONF_PEAK):
sens = await sensor.new_sensor(peak_config)
cg.add(var.set_peak_sensor(sens))
if rms_config := config.get(CONF_RMS):
sens = await sensor.new_sensor(rms_config)
cg.add(var.set_rms_sensor(sens))
SOUND_LEVEL_ACTION_SCHEMA = automation.maybe_simple_id(
{
cv.GenerateID(): cv.use_id(SoundLevelComponent),
}
)
@automation.register_action("sound_level.start", StartAction, SOUND_LEVEL_ACTION_SCHEMA)
@automation.register_action("sound_level.stop", StopAction, SOUND_LEVEL_ACTION_SCHEMA)
async def sound_level_action_to_code(config, action_id, template_arg, args):
var = cg.new_Pvariable(action_id, template_arg)
await cg.register_parented(var, config[CONF_ID])
return var

View File

@ -0,0 +1,194 @@
#include "sound_level.h"
#ifdef USE_ESP32
#include "esphome/core/log.h"
#include <cmath>
#include <cstdint>
namespace esphome {
namespace sound_level {
static const char *const TAG = "sound_level";
static const uint32_t AUDIO_BUFFER_DURATION_MS = 30;
static const uint32_t RING_BUFFER_DURATION_MS = 120;
// Square INT16_MIN since INT16_MIN^2 > INT16_MAX^2
static const double MAX_SAMPLE_SQUARED_DENOMINATOR = INT16_MIN * INT16_MIN;
void SoundLevelComponent::dump_config() {
ESP_LOGCONFIG(TAG, "Sound Level Component:");
ESP_LOGCONFIG(TAG, " Measurement Duration: %" PRIu32 " ms", measurement_duration_ms_);
LOG_SENSOR(" ", "Peak:", this->peak_sensor_);
LOG_SENSOR(" ", "RMS:", this->rms_sensor_);
}
void SoundLevelComponent::setup() {
this->microphone_source_->add_data_callback([this](const std::vector<uint8_t> &data) {
std::shared_ptr<RingBuffer> temp_ring_buffer = this->ring_buffer_.lock();
if (this->ring_buffer_.use_count() == 2) {
// ``audio_buffer_`` and ``temp_ring_buffer`` share ownership of a ring buffer, so its safe/useful to write
temp_ring_buffer->write((void *) data.data(), data.size());
}
});
if (!this->microphone_source_->is_passive()) {
// Automatically start the microphone if not in passive mode
this->microphone_source_->start();
}
}
void SoundLevelComponent::loop() {
if ((this->peak_sensor_ == nullptr) && (this->rms_sensor_ == nullptr)) {
// No sensors configured, nothing to do
return;
}
if (this->microphone_source_->is_running() && !this->status_has_error()) {
// Allocate buffers
if (this->start_()) {
this->status_clear_warning();
}
} else {
if (!this->status_has_warning()) {
this->status_set_warning("Microphone isn't running, can't compute statistics");
// Deallocate buffers, if necessary
this->stop_();
// Reset sensor outputs
if (this->peak_sensor_ != nullptr) {
this->peak_sensor_->publish_state(NAN);
}
if (this->rms_sensor_ != nullptr) {
this->rms_sensor_->publish_state(NAN);
}
// Reset accumulators
this->squared_peak_ = 0;
this->squared_samples_sum_ = 0;
this->sample_count_ = 0;
}
return;
}
if (this->status_has_error()) {
return;
}
// Copy data from ring buffer into the transfer buffer - don't block to avoid slowing the main loop
this->audio_buffer_->transfer_data_from_source(0);
if (this->audio_buffer_->available() == 0) {
// No new audio available for processing
return;
}
const uint32_t samples_in_window =
this->microphone_source_->get_audio_stream_info().ms_to_samples(this->measurement_duration_ms_);
const uint32_t samples_available_to_process =
this->microphone_source_->get_audio_stream_info().bytes_to_samples(this->audio_buffer_->available());
const uint32_t samples_to_process = std::min(samples_in_window - this->sample_count_, samples_available_to_process);
// MicrophoneSource always provides int16 samples due to Python codegen settings
const int16_t *audio_data = reinterpret_cast<const int16_t *>(this->audio_buffer_->get_buffer_start());
// Process all the new audio samples
for (uint32_t i = 0; i < samples_to_process; ++i) {
// Squaring int16 samples won't overflow an int32
int32_t squared_sample = static_cast<int32_t>(audio_data[i]) * static_cast<int32_t>(audio_data[i]);
if (this->peak_sensor_ != nullptr) {
this->squared_peak_ = std::max(this->squared_peak_, squared_sample);
}
if (this->rms_sensor_ != nullptr) {
// Squared sum is an uint64 type - at max levels, an uint32 type would overflow after ~8 samples
this->squared_samples_sum_ += squared_sample;
}
++this->sample_count_;
}
// Remove the processed samples from ``audio_buffer_``
this->audio_buffer_->decrease_buffer_length(
this->microphone_source_->get_audio_stream_info().samples_to_bytes(samples_to_process));
if (this->sample_count_ == samples_in_window) {
// Processed enough samples for the measurement window, compute and publish the sensor values
if (this->peak_sensor_ != nullptr) {
const float peak_db = 10.0f * log10(static_cast<float>(this->squared_peak_) / MAX_SAMPLE_SQUARED_DENOMINATOR);
this->peak_sensor_->publish_state(peak_db);
this->squared_peak_ = 0; // reset accumulator
}
if (this->rms_sensor_ != nullptr) {
// Calculations are done with doubles instead of floats - floats lose precision for even modest window durations
const double rms_db = 10.0 * log10((this->squared_samples_sum_ / MAX_SAMPLE_SQUARED_DENOMINATOR) /
static_cast<double>(samples_in_window));
this->rms_sensor_->publish_state(rms_db);
this->squared_samples_sum_ = 0; // reset accumulator
}
this->sample_count_ = 0; // reset counter
}
}
void SoundLevelComponent::start() {
if (this->microphone_source_->is_passive()) {
ESP_LOGW(TAG, "Can't start the microphone in passive mode");
return;
}
this->microphone_source_->start();
}
void SoundLevelComponent::stop() {
if (this->microphone_source_->is_passive()) {
ESP_LOGW(TAG, "Can't stop microphone in passive mode");
return;
}
this->microphone_source_->stop();
}
bool SoundLevelComponent::start_() {
if (this->audio_buffer_ != nullptr) {
return true;
}
// Allocate a transfer buffer
this->audio_buffer_ = audio::AudioSourceTransferBuffer::create(
this->microphone_source_->get_audio_stream_info().ms_to_bytes(AUDIO_BUFFER_DURATION_MS));
if (this->audio_buffer_ == nullptr) {
this->status_momentary_error("Failed to allocate transfer buffer", 15000);
return false;
}
// Allocates a new ring buffer, adds it as a source for the transfer buffer, and points ring_buffer_ to it
this->ring_buffer_.reset(); // Reset pointer to any previous ring buffer allocation
std::shared_ptr<RingBuffer> temp_ring_buffer =
RingBuffer::create(this->microphone_source_->get_audio_stream_info().ms_to_bytes(RING_BUFFER_DURATION_MS));
if (temp_ring_buffer.use_count() == 0) {
this->status_momentary_error("Failed to allocate ring buffer", 15000);
this->stop_();
return false;
} else {
this->ring_buffer_ = temp_ring_buffer;
this->audio_buffer_->set_source(temp_ring_buffer);
}
this->status_clear_error();
return true;
}
void SoundLevelComponent::stop_() { this->audio_buffer_.reset(); }
} // namespace sound_level
} // namespace esphome
#endif

View File

@ -0,0 +1,73 @@
#pragma once
#ifdef USE_ESP32
#include "esphome/components/audio/audio_transfer_buffer.h"
#include "esphome/components/microphone/microphone_source.h"
#include "esphome/components/sensor/sensor.h"
#include "esphome/core/component.h"
#include "esphome/core/ring_buffer.h"
namespace esphome {
namespace sound_level {
class SoundLevelComponent : public Component {
public:
void dump_config() override;
void setup() override;
void loop() override;
float get_setup_priority() const override { return setup_priority::AFTER_CONNECTION; }
void set_measurement_duration(uint32_t measurement_duration_ms) {
this->measurement_duration_ms_ = measurement_duration_ms;
}
void set_microphone_source(microphone::MicrophoneSource *microphone_source) {
this->microphone_source_ = microphone_source;
}
void set_peak_sensor(sensor::Sensor *peak_sensor) { this->peak_sensor_ = peak_sensor; }
void set_rms_sensor(sensor::Sensor *rms_sensor) { this->rms_sensor_ = rms_sensor; }
/// @brief Starts the MicrophoneSource to start measuring sound levels
void start();
/// @brief Stops the MicrophoneSource
void stop();
protected:
/// @brief Internal start command that, if necessary, allocates ``audio_buffer_`` and a ring buffer which
/// ``audio_buffer_`` owns and ``ring_buffer_`` points to. Returns true if allocations were successful.
bool start_();
/// @brief Internal stop command the deallocates ``audio_buffer_`` (which automatically deallocates its ring buffer)
void stop_();
microphone::MicrophoneSource *microphone_source_{nullptr};
sensor::Sensor *peak_sensor_{nullptr};
sensor::Sensor *rms_sensor_{nullptr};
std::unique_ptr<audio::AudioSourceTransferBuffer> audio_buffer_;
std::weak_ptr<RingBuffer> ring_buffer_;
int32_t squared_peak_{0};
uint64_t squared_samples_sum_{0};
uint32_t sample_count_{0};
uint32_t measurement_duration_ms_;
};
template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<SoundLevelComponent> {
public:
void play(Ts... x) override { this->parent_->start(); }
};
template<typename... Ts> class StopAction : public Action<Ts...>, public Parented<SoundLevelComponent> {
public:
void play(Ts... x) override { this->parent_->stop(); }
};
} // namespace sound_level
} // namespace esphome
#endif

View File

@ -0,0 +1,26 @@
i2s_audio:
i2s_lrclk_pin: ${i2s_bclk_pin}
i2s_bclk_pin: ${i2s_lrclk_pin}
microphone:
- platform: i2s_audio
id: i2s_microphone
i2s_din_pin: ${i2s_dout_pin}
adc_type: external
bits_per_sample: 16bit
sensor:
- platform: sound_level
microphone: i2s_microphone
measurement_duration: 2000ms
passive: false
peak:
name: "Peak Sound Level"
on_value_range:
- above: -1.0
then:
- sound_level.stop:
- delay: 5s
- sound_level.start:
rms:
name: "RMS Sound Level"

View File

@ -0,0 +1,6 @@
substitutions:
i2s_bclk_pin: GPIO25
i2s_lrclk_pin: GPIO26
i2s_dout_pin: GPIO27
<<: !include common.yaml

View File

@ -0,0 +1,6 @@
substitutions:
i2s_bclk_pin: GPIO6
i2s_lrclk_pin: GPIO7
i2s_dout_pin: GPIO8
<<: !include common.yaml

View File

@ -0,0 +1,6 @@
substitutions:
i2s_bclk_pin: GPIO6
i2s_lrclk_pin: GPIO7
i2s_dout_pin: GPIO8
<<: !include common.yaml

View File

@ -0,0 +1,6 @@
substitutions:
i2s_bclk_pin: GPIO25
i2s_lrclk_pin: GPIO26
i2s_dout_pin: GPIO27
<<: !include common.yaml

View File

@ -0,0 +1,6 @@
substitutions:
i2s_bclk_pin: GPIO4
i2s_lrclk_pin: GPIO5
i2s_dout_pin: GPIO6
<<: !include common.yaml

View File

@ -0,0 +1,6 @@
substitutions:
i2s_bclk_pin: GPIO4
i2s_lrclk_pin: GPIO5
i2s_dout_pin: GPIO6
<<: !include common.yaml